52 lines
1.4 KiB
Python
52 lines
1.4 KiB
Python
|
|
import argparse
|
||
|
|
import asyncio
|
||
|
|
import logging
|
||
|
|
import os
|
||
|
|
from typing import *
|
||
|
|
|
||
|
|
import postillon
|
||
|
|
|
||
|
|
log = logging.getLogger(__name__)
|
||
|
|
|
||
|
|
logging.basicConfig(
|
||
|
|
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
||
|
|
level=os.getenv("LOGLEVEL", "INFO"),
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
async def all_posts(feed_url, throttle: int = 10) -> AsyncIterable[postillon.Post]:
|
||
|
|
"""We can't use feed's all_posts because blogger creates broken next URLs."""
|
||
|
|
feed = postillon.Feed(feed_url, url="", next_url=feed_url)
|
||
|
|
while (feed := feed.load_next()) :
|
||
|
|
log.debug(f"New feed page: {feed}")
|
||
|
|
if feed.next_url:
|
||
|
|
feed.next_url = feed.next_url.replace(
|
||
|
|
f"{postillon.FEED_URL}/-/Newsticker", postillon.FEED_URL
|
||
|
|
)
|
||
|
|
for post in feed.posts:
|
||
|
|
yield post
|
||
|
|
log.debug(f"Waiting for {throttle} seconds ...")
|
||
|
|
await asyncio.sleep(throttle)
|
||
|
|
|
||
|
|
|
||
|
|
async def dump_all(dbpath: str, feed_url: str):
|
||
|
|
store = postillon.Store(dbpath)
|
||
|
|
store.connect()
|
||
|
|
async for post in all_posts(feed_url):
|
||
|
|
store.add(postillon.split_post(post))
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
parser = argparse.ArgumentParser()
|
||
|
|
parser.add_argument(
|
||
|
|
"--database", "--db", "-d", required=True, help="Path to database.sqlite"
|
||
|
|
)
|
||
|
|
parser.add_argument("--feed", "-f", default=postillon.FEED_URL, help="Feed URL")
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
asyncio.run(dump_all(args.database, args.feed))
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|