From 76e7969209bcf365a22403a9e74270be2515f104 Mon Sep 17 00:00:00 2001 From: ducklet Date: Thu, 12 Nov 2020 23:44:06 +0100 Subject: [PATCH] feeds: enable updating feeds at different intervals --- feeder/feeder.py | 22 ++++++++++----- hotdog/bot.py | 5 ++++ hotdog/command/feed.py | 47 ++++++++++++++++++++++++--------- hotdog/command/urlinfo_/imdb.py | 21 +-------------- hotdog/functions.py | 21 +++++++++++++++ 5 files changed, 76 insertions(+), 40 deletions(-) diff --git a/feeder/feeder.py b/feeder/feeder.py index fc0f154..85cf834 100644 --- a/feeder/feeder.py +++ b/feeder/feeder.py @@ -12,7 +12,6 @@ class Feeder: def __init__(self, store: Store, feeds: Iterable[Feed] = None): self.feeds: Dict[str, Feed] = {} self.store: Store = store - self.news: Mapping[FeedId, Set[PostId]] = {} if feeds: self.add_feeds(feeds) @@ -22,17 +21,26 @@ class Feeder: self.store.sync_feeds(self.feeds) log.debug("Active feeds: %s", ", ".join(self.feeds.keys())) - async def update_all(self) -> Mapping[FeedId, Set[PostId]]: - new_post_ids = self.news = dict( + async def update_all(self, feed_ids=None) -> Mapping[FeedId, Set[PostId]]: + """Update all feeds. + + Automatically persists any new posts in storage. + """ + feeds = {i: self.feeds[i] for i in feed_ids} if feed_ids else self.feeds + new_post_ids = dict( zip( - self.feeds, - await asyncio.gather(*(self.update(id) for id in self.feeds)), + feeds, + await asyncio.gather(*(self._update(id) for id in feeds)), ) ) - self.store.sync_feeds(self.feeds) + self.store.sync_feeds(feeds) return new_post_ids - async def update(self, feed_id) -> Set[PostId]: + async def _update(self, feed_id) -> Set[PostId]: + """Update a single feed. + + Does not persist any changes. + """ feed = self.feeds[feed_id] post_ids = feed.post_ids feed.load() diff --git a/hotdog/bot.py b/hotdog/bot.py index 25d99d3..873ab42 100644 --- a/hotdog/bot.py +++ b/hotdog/bot.py @@ -116,6 +116,10 @@ class Bot: else: self.plugins[name] = mod + log.debug(f"Active plugins: {', '.join(sorted(p for p in self.plugins))}") + for t in self.timers: + log.debug(f"Active timer: {t}") + async def _on_unknown(self, room: MatrixRoom, event: UnknownEvent): # See if we can transform an Unknown event into something we DO know. if event.type == "m.reaction": @@ -211,6 +215,7 @@ class Bot: for job in self.timers: if job.next is not None and job.next <= now(): job.next = None + log.debug(f"Job is ready: {job}") try: coro = job.func(job) except Exception as err: diff --git a/hotdog/command/feed.py b/hotdog/command/feed.py index 408035e..4535dce 100644 --- a/hotdog/command/feed.py +++ b/hotdog/command/feed.py @@ -7,7 +7,15 @@ from typing import * import feeder import postillon -from ..functions import capped_text, clamp, localizedtz, reply, send_message, strip_tags +from ..functions import ( + capped_text, + clamp, + localizedtz, + parse_period, + reply, + send_message, + strip_tags, +) from ..models import Job, Message log = logging.getLogger(__name__) @@ -44,13 +52,26 @@ def init(bot): bot.shared["poststore"].connect() one_minute = 60 - one_hour = 3600 - bot.add_timer( - title="update feeds", - every=one_hour, - callback=update_feeds, - jitter=10 * one_minute, - ) + one_hour = 60 * one_minute + ten_percent = 10 / 100 + + # Create timers to update each feed. + # XXX we could reduce the timers by grouping feeds with the same update interval + feedconf = bot.config.get("feeder.feeds") + for feed in bot.shared["feeder"].feeds.values(): + if feed.active: + every_period = feedconf.get(feed.id, {}).get("update_every") + every_s = parse_period(every_period) if every_period else one_hour + + async def update_feed_cb(job: Job): + await update_feeds([feed.id], job) + + bot.add_timer( + title=f"update feed: {feed.id}", + every=every_s, + callback=update_feed_cb, + jitter=ten_percent * every_s, + ) async def handle(message: Message): @@ -82,13 +103,13 @@ def handle_postillon(bot, posts): poststore.add(postillon.split_post(post)) -async def update_feeds(job: Job): +async def update_feeds(feed_ids, job: Job): max_posts = 2 bot = job.app feeder = bot.shared["feeder"] - feeds = bot.config.get("feeder.feeds") - rooms = {fid: f.get("rooms", []) for fid, f in feeds.items()} - news = await feeder.update_all() + feedconfs = bot.config.get("feeder.feeds") + rooms = {fid: f.get("rooms", []) for fid, f in feedconfs.items()} + news = await feeder.update_all(feed_ids) sends = [] mores = [] for feed_id, post_ids in news.items(): @@ -105,7 +126,7 @@ async def update_feeds(job: Job): post, tzname=roomconf["timezone"], lc=roomconf["locale"], - max_content_len=feeds[feed_id].get("max_content_len", 300), + max_content_len=feedconfs[feed_id].get("max_content_len", 300), ) text = f"{prefix} {text}" sends.append(send_message(bot.client, room_id, html=text)) diff --git a/hotdog/command/urlinfo_/imdb.py b/hotdog/command/urlinfo_/imdb.py index 72bd033..8578f38 100644 --- a/hotdog/command/urlinfo_/imdb.py +++ b/hotdog/command/urlinfo_/imdb.py @@ -14,6 +14,7 @@ from ...functions import ( ElementParser, capped_text, escape_all, + parse_period, pretty_duration, reply, send_image, @@ -44,26 +45,6 @@ def thumbnail(url, width=182, height=268): return parts._replace(path=str(path)).geturl() -period_re = re.compile( - r"P((?P\d+)Y)?((?P\d+)M)?((?P\d+)D)?T((?P\d+)H)?((?P\d+)M)?((?P\d+)S)?" -) - - -def parse_period(s: str) -> int: - # see https://en.wikipedia.org/wiki/ISO_8601#Durations - seconds = { - "year": 365 * 86400, - "month": 30 * 86400, - "day": 86400, - "hour": 3600, - "minute": 60, - "second": 1, - } - if not (match := period_re.fullmatch(s)): - return 0 - return sum(seconds[k] * int(v) for k, v in match.groupdict().items() if v) - - _import_image_cache = ( {} ) # XXX ideally we'd cache these forever (in some permanent storage) diff --git a/hotdog/functions.py b/hotdog/functions.py index a20ac29..11093a1 100644 --- a/hotdog/functions.py +++ b/hotdog/functions.py @@ -1,5 +1,6 @@ import locale import logging +import re import unicodedata from collections import defaultdict from contextlib import contextmanager @@ -320,3 +321,23 @@ def escape_all(dc, escape: Callable[[str], str] = html_escape) -> None: setattr(dc, f.name, [escape(x) for x in getattr(dc, f.name)]) elif get_origin(f.type) is dict and get_args(f.type)[1] is str: setattr(dc, f.name, {k: escape(v) for k, v in getattr(dc, f.name).items()}) + + +period_re = re.compile( + r"P((?P\d+)Y)?((?P\d+)M)?((?P\d+)D)?T((?P\d+)H)?((?P\d+)M)?((?P\d+)S)?" +) + + +def parse_period(s: str) -> int: + # see https://en.wikipedia.org/wiki/ISO_8601#Durations + seconds = { + "year": 365 * 86400, + "month": 30 * 86400, + "day": 86400, + "hour": 3600, + "minute": 60, + "second": 1, + } + if not (match := period_re.fullmatch(s)): + return 0 + return sum(seconds[k] * int(v) for k, v in match.groupdict().items() if v)