import argparse import asyncio import logging import os from typing import * import postillon log = logging.getLogger(__name__) logging.basicConfig( format="%(asctime)s [%(name)s] %(levelname)s: %(message)s", level=os.getenv("LOGLEVEL", "INFO"), ) async def all_posts(feed_url, throttle: int = 10) -> AsyncIterable[postillon.Post]: """We can't use feed's all_posts because blogger creates broken next URLs.""" feed = postillon.Feed(feed_url, url="", next_url=feed_url) while (feed := feed.load_next()) : log.debug(f"New feed page: {feed}") if feed.next_url: feed.next_url = feed.next_url.replace( f"{postillon.FEED_URL}/-/Newsticker", postillon.FEED_URL ) for post in feed.posts: yield post log.debug(f"Waiting for {throttle} seconds ...") await asyncio.sleep(throttle) async def dump_all(dbpath: str, feed_url: str): store = postillon.Store(dbpath) store.connect() async for post in all_posts(feed_url): store.add(postillon.split_post(post)) def main(): parser = argparse.ArgumentParser() parser.add_argument( "--database", "--db", "-d", required=True, help="Path to database.sqlite" ) parser.add_argument("--feed", "-f", default=postillon.FEED_URL, help="Feed URL") args = parser.parse_args() asyncio.run(dump_all(args.database, args.feed)) if __name__ == "__main__": main()