hotdog/postillon/__main__.py

52 lines
1.4 KiB
Python
Raw Permalink Normal View History

2020-11-01 16:31:37 +01:00
import argparse
import asyncio
import logging
import os
from typing import *
import postillon
log = logging.getLogger(__name__)
logging.basicConfig(
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
level=os.getenv("LOGLEVEL", "INFO"),
)
async def all_posts(feed_url, throttle: int = 10) -> AsyncIterable[postillon.Post]:
"""We can't use feed's all_posts because blogger creates broken next URLs."""
feed = postillon.Feed(feed_url, url="", next_url=feed_url)
while (feed := feed.load_next()) :
log.debug(f"New feed page: {feed}")
if feed.next_url:
feed.next_url = feed.next_url.replace(
f"{postillon.FEED_URL}/-/Newsticker", postillon.FEED_URL
)
for post in feed.posts:
yield post
log.debug(f"Waiting for {throttle} seconds ...")
await asyncio.sleep(throttle)
async def dump_all(dbpath: str, feed_url: str):
store = postillon.Store(dbpath)
store.connect()
async for post in all_posts(feed_url):
store.add(postillon.split_post(post))
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--database", "--db", "-d", required=True, help="Path to database.sqlite"
)
parser.add_argument("--feed", "-f", default=postillon.FEED_URL, help="Feed URL")
args = parser.parse_args()
asyncio.run(dump_all(args.database, args.feed))
if __name__ == "__main__":
main()