hotdog/feeder/feeder.py

57 lines
1.8 KiB
Python

import asyncio
import logging
from typing import *
from .models import Feed, FeedId, Post, PostId
from .store import Store
log = logging.getLogger(__name__)
class Feeder:
def __init__(self, store: Store, feeds: Iterable[Feed] = None):
self.feeds: Dict[str, Feed] = {}
self.store: Store = store
self.news: Mapping[FeedId, Set[PostId]] = {}
if feeds:
self.add_feeds(feeds)
def add_feeds(self, feeds: Iterable[Feed]):
self.feeds.update({f.id: f for f in feeds})
self.store.sync_feeds(self.feeds)
async def update_all(self) -> Mapping[FeedId, Set[PostId]]:
new_post_ids = self.news = dict(
zip(
self.feeds,
await asyncio.gather(*(self.update(id) for id in self.feeds)),
)
)
self.store.sync_feeds(self.feeds)
return new_post_ids
async def update(self, feed_id) -> Set[PostId]:
feed = self.feeds[feed_id]
post_ids = feed.post_ids
feed.load()
return feed.post_ids - post_ids
def posts(self, feed_id: FeedId, post_ids: Sequence[PostId]) -> Sequence[Post]:
return self.store.posts(feed_id, post_ids)
async def all_posts(feed_url: str, throttle: int = 10) -> AsyncIterable[Post]:
"""Yield all posts from the given feed URL and all following pages.
A feed can be split into multiple pages.
The Feed's normal load function ignores them. This function follows
them and returns all Posts from all pages.
"""
feed = Feed(id=feed_url, url="", next_url=feed_url)
while (feed := feed.load_next()) :
log.debug(f"New feed page: {feed}")
for post in feed.posts:
yield post
log.debug(f"Waiting for {throttle} seconds ...")
await asyncio.sleep(throttle)