hotdog/feeder/feeder.py

67 lines
2 KiB
Python
Raw Permalink Normal View History

2020-11-01 16:31:37 +01:00
import asyncio
import logging
from typing import *
from .models import Feed, FeedId, Post, PostId
from .store import Store
log = logging.getLogger(__name__)
class Feeder:
def __init__(self, store: Store, feeds: Iterable[Feed] = None):
self.feeds: Dict[str, Feed] = {}
self.store: Store = store
if feeds:
self.add_feeds(feeds)
def add_feeds(self, feeds: Iterable[Feed]):
self.feeds.update({f.id: f for f in feeds})
self.store.sync_feeds(self.feeds)
log.debug("Active feeds: %s", ", ".join(self.feeds.keys()))
2020-11-01 16:31:37 +01:00
async def update_all(self, feed_ids=None) -> Mapping[FeedId, Set[PostId]]:
"""Update all feeds.
Automatically persists any new posts in storage.
"""
feeds = {i: self.feeds[i] for i in feed_ids} if feed_ids else self.feeds
new_post_ids = dict(
2020-11-01 16:31:37 +01:00
zip(
feeds,
await asyncio.gather(*(self._update(id) for id in feeds)),
2020-11-01 16:31:37 +01:00
)
)
self.store.sync_feeds(feeds)
2020-11-01 16:31:37 +01:00
return new_post_ids
async def _update(self, feed_id) -> Set[PostId]:
"""Update a single feed.
Does not persist any changes.
"""
2020-11-01 16:31:37 +01:00
feed = self.feeds[feed_id]
post_ids = feed.post_ids
feed.load()
return feed.post_ids - post_ids
def posts(self, feed_id: FeedId, post_ids: Sequence[PostId]) -> Sequence[Post]:
return self.store.posts(feed_id, post_ids)
async def all_posts(feed_url: str, throttle: int = 10) -> AsyncIterable[Post]:
"""Yield all posts from the given feed URL and all following pages.
A feed can be split into multiple pages.
The Feed's normal load function ignores them. This function follows
them and returns all Posts from all pages.
"""
feed = Feed(id=feed_url, url="", next_url=feed_url)
while (feed := feed.load_next()) :
log.debug(f"New feed page: {feed}")
for post in feed.posts:
yield post
log.debug(f"Waiting for {throttle} seconds ...")
await asyncio.sleep(throttle)