58 lines
1.8 KiB
Python
58 lines
1.8 KiB
Python
|
|
import asyncio
|
||
|
|
import logging
|
||
|
|
from typing import *
|
||
|
|
|
||
|
|
from .models import Feed, FeedId, Post, PostId
|
||
|
|
from .store import Store
|
||
|
|
|
||
|
|
log = logging.getLogger(__name__)
|
||
|
|
|
||
|
|
|
||
|
|
class Feeder:
|
||
|
|
def __init__(self, store: Store, feeds: Iterable[Feed] = None):
|
||
|
|
self.feeds: Dict[str, Feed] = {}
|
||
|
|
self.store: Store = store
|
||
|
|
self.news: Mapping[FeedId, Set[PostId]] = {}
|
||
|
|
|
||
|
|
if feeds:
|
||
|
|
self.add_feeds(feeds)
|
||
|
|
|
||
|
|
def add_feeds(self, feeds: Iterable[Feed]):
|
||
|
|
self.feeds.update({f.id: f for f in feeds})
|
||
|
|
self.store.sync_feeds(self.feeds)
|
||
|
|
|
||
|
|
async def update_all(self) -> Mapping[FeedId, Set[PostId]]:
|
||
|
|
new_post_ids = self.news = dict(
|
||
|
|
zip(
|
||
|
|
self.feeds,
|
||
|
|
await asyncio.gather(*(self.update(id) for id in self.feeds)),
|
||
|
|
)
|
||
|
|
)
|
||
|
|
self.store.sync_feeds(self.feeds)
|
||
|
|
return new_post_ids
|
||
|
|
|
||
|
|
async def update(self, feed_id) -> Set[PostId]:
|
||
|
|
feed = self.feeds[feed_id]
|
||
|
|
post_ids = feed.post_ids
|
||
|
|
feed.load()
|
||
|
|
return feed.post_ids - post_ids
|
||
|
|
|
||
|
|
def posts(self, feed_id: FeedId, post_ids: Sequence[PostId]) -> Sequence[Post]:
|
||
|
|
return self.store.posts(feed_id, post_ids)
|
||
|
|
|
||
|
|
|
||
|
|
async def all_posts(feed_url: str, throttle: int = 10) -> AsyncIterable[Post]:
|
||
|
|
"""Yield all posts from the given feed URL and all following pages.
|
||
|
|
|
||
|
|
A feed can be split into multiple pages.
|
||
|
|
The Feed's normal load function ignores them. This function follows
|
||
|
|
them and returns all Posts from all pages.
|
||
|
|
"""
|
||
|
|
feed = Feed(id=feed_url, url="", next_url=feed_url)
|
||
|
|
while (feed := feed.load_next()) :
|
||
|
|
log.debug(f"New feed page: {feed}")
|
||
|
|
for post in feed.posts:
|
||
|
|
yield post
|
||
|
|
log.debug(f"Waiting for {throttle} seconds ...")
|
||
|
|
await asyncio.sleep(throttle)
|