feeds: enable updating feeds at different intervals
This commit is contained in:
parent
a96704e1fa
commit
76e7969209
5 changed files with 76 additions and 40 deletions
|
|
@ -12,7 +12,6 @@ class Feeder:
|
|||
def __init__(self, store: Store, feeds: Iterable[Feed] = None):
|
||||
self.feeds: Dict[str, Feed] = {}
|
||||
self.store: Store = store
|
||||
self.news: Mapping[FeedId, Set[PostId]] = {}
|
||||
|
||||
if feeds:
|
||||
self.add_feeds(feeds)
|
||||
|
|
@ -22,17 +21,26 @@ class Feeder:
|
|||
self.store.sync_feeds(self.feeds)
|
||||
log.debug("Active feeds: %s", ", ".join(self.feeds.keys()))
|
||||
|
||||
async def update_all(self) -> Mapping[FeedId, Set[PostId]]:
|
||||
new_post_ids = self.news = dict(
|
||||
async def update_all(self, feed_ids=None) -> Mapping[FeedId, Set[PostId]]:
|
||||
"""Update all feeds.
|
||||
|
||||
Automatically persists any new posts in storage.
|
||||
"""
|
||||
feeds = {i: self.feeds[i] for i in feed_ids} if feed_ids else self.feeds
|
||||
new_post_ids = dict(
|
||||
zip(
|
||||
self.feeds,
|
||||
await asyncio.gather(*(self.update(id) for id in self.feeds)),
|
||||
feeds,
|
||||
await asyncio.gather(*(self._update(id) for id in feeds)),
|
||||
)
|
||||
)
|
||||
self.store.sync_feeds(self.feeds)
|
||||
self.store.sync_feeds(feeds)
|
||||
return new_post_ids
|
||||
|
||||
async def update(self, feed_id) -> Set[PostId]:
|
||||
async def _update(self, feed_id) -> Set[PostId]:
|
||||
"""Update a single feed.
|
||||
|
||||
Does not persist any changes.
|
||||
"""
|
||||
feed = self.feeds[feed_id]
|
||||
post_ids = feed.post_ids
|
||||
feed.load()
|
||||
|
|
|
|||
|
|
@ -116,6 +116,10 @@ class Bot:
|
|||
else:
|
||||
self.plugins[name] = mod
|
||||
|
||||
log.debug(f"Active plugins: {', '.join(sorted(p for p in self.plugins))}")
|
||||
for t in self.timers:
|
||||
log.debug(f"Active timer: {t}")
|
||||
|
||||
async def _on_unknown(self, room: MatrixRoom, event: UnknownEvent):
|
||||
# See if we can transform an Unknown event into something we DO know.
|
||||
if event.type == "m.reaction":
|
||||
|
|
@ -211,6 +215,7 @@ class Bot:
|
|||
for job in self.timers:
|
||||
if job.next is not None and job.next <= now():
|
||||
job.next = None
|
||||
log.debug(f"Job is ready: {job}")
|
||||
try:
|
||||
coro = job.func(job)
|
||||
except Exception as err:
|
||||
|
|
|
|||
|
|
@ -7,7 +7,15 @@ from typing import *
|
|||
import feeder
|
||||
import postillon
|
||||
|
||||
from ..functions import capped_text, clamp, localizedtz, reply, send_message, strip_tags
|
||||
from ..functions import (
|
||||
capped_text,
|
||||
clamp,
|
||||
localizedtz,
|
||||
parse_period,
|
||||
reply,
|
||||
send_message,
|
||||
strip_tags,
|
||||
)
|
||||
from ..models import Job, Message
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
|
@ -44,12 +52,25 @@ def init(bot):
|
|||
bot.shared["poststore"].connect()
|
||||
|
||||
one_minute = 60
|
||||
one_hour = 3600
|
||||
one_hour = 60 * one_minute
|
||||
ten_percent = 10 / 100
|
||||
|
||||
# Create timers to update each feed.
|
||||
# XXX we could reduce the timers by grouping feeds with the same update interval
|
||||
feedconf = bot.config.get("feeder.feeds")
|
||||
for feed in bot.shared["feeder"].feeds.values():
|
||||
if feed.active:
|
||||
every_period = feedconf.get(feed.id, {}).get("update_every")
|
||||
every_s = parse_period(every_period) if every_period else one_hour
|
||||
|
||||
async def update_feed_cb(job: Job):
|
||||
await update_feeds([feed.id], job)
|
||||
|
||||
bot.add_timer(
|
||||
title="update feeds",
|
||||
every=one_hour,
|
||||
callback=update_feeds,
|
||||
jitter=10 * one_minute,
|
||||
title=f"update feed: {feed.id}",
|
||||
every=every_s,
|
||||
callback=update_feed_cb,
|
||||
jitter=ten_percent * every_s,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -82,13 +103,13 @@ def handle_postillon(bot, posts):
|
|||
poststore.add(postillon.split_post(post))
|
||||
|
||||
|
||||
async def update_feeds(job: Job):
|
||||
async def update_feeds(feed_ids, job: Job):
|
||||
max_posts = 2
|
||||
bot = job.app
|
||||
feeder = bot.shared["feeder"]
|
||||
feeds = bot.config.get("feeder.feeds")
|
||||
rooms = {fid: f.get("rooms", []) for fid, f in feeds.items()}
|
||||
news = await feeder.update_all()
|
||||
feedconfs = bot.config.get("feeder.feeds")
|
||||
rooms = {fid: f.get("rooms", []) for fid, f in feedconfs.items()}
|
||||
news = await feeder.update_all(feed_ids)
|
||||
sends = []
|
||||
mores = []
|
||||
for feed_id, post_ids in news.items():
|
||||
|
|
@ -105,7 +126,7 @@ async def update_feeds(job: Job):
|
|||
post,
|
||||
tzname=roomconf["timezone"],
|
||||
lc=roomconf["locale"],
|
||||
max_content_len=feeds[feed_id].get("max_content_len", 300),
|
||||
max_content_len=feedconfs[feed_id].get("max_content_len", 300),
|
||||
)
|
||||
text = f"{prefix} {text}"
|
||||
sends.append(send_message(bot.client, room_id, html=text))
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ from ...functions import (
|
|||
ElementParser,
|
||||
capped_text,
|
||||
escape_all,
|
||||
parse_period,
|
||||
pretty_duration,
|
||||
reply,
|
||||
send_image,
|
||||
|
|
@ -44,26 +45,6 @@ def thumbnail(url, width=182, height=268):
|
|||
return parts._replace(path=str(path)).geturl()
|
||||
|
||||
|
||||
period_re = re.compile(
|
||||
r"P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<day>\d+)D)?T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?"
|
||||
)
|
||||
|
||||
|
||||
def parse_period(s: str) -> int:
|
||||
# see https://en.wikipedia.org/wiki/ISO_8601#Durations
|
||||
seconds = {
|
||||
"year": 365 * 86400,
|
||||
"month": 30 * 86400,
|
||||
"day": 86400,
|
||||
"hour": 3600,
|
||||
"minute": 60,
|
||||
"second": 1,
|
||||
}
|
||||
if not (match := period_re.fullmatch(s)):
|
||||
return 0
|
||||
return sum(seconds[k] * int(v) for k, v in match.groupdict().items() if v)
|
||||
|
||||
|
||||
_import_image_cache = (
|
||||
{}
|
||||
) # XXX ideally we'd cache these forever (in some permanent storage)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import locale
|
||||
import logging
|
||||
import re
|
||||
import unicodedata
|
||||
from collections import defaultdict
|
||||
from contextlib import contextmanager
|
||||
|
|
@ -320,3 +321,23 @@ def escape_all(dc, escape: Callable[[str], str] = html_escape) -> None:
|
|||
setattr(dc, f.name, [escape(x) for x in getattr(dc, f.name)])
|
||||
elif get_origin(f.type) is dict and get_args(f.type)[1] is str:
|
||||
setattr(dc, f.name, {k: escape(v) for k, v in getattr(dc, f.name).items()})
|
||||
|
||||
|
||||
period_re = re.compile(
|
||||
r"P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<day>\d+)D)?T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?"
|
||||
)
|
||||
|
||||
|
||||
def parse_period(s: str) -> int:
|
||||
# see https://en.wikipedia.org/wiki/ISO_8601#Durations
|
||||
seconds = {
|
||||
"year": 365 * 86400,
|
||||
"month": 30 * 86400,
|
||||
"day": 86400,
|
||||
"hour": 3600,
|
||||
"minute": 60,
|
||||
"second": 1,
|
||||
}
|
||||
if not (match := period_re.fullmatch(s)):
|
||||
return 0
|
||||
return sum(seconds[k] * int(v) for k, v in match.groupdict().items() if v)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue