dump current state (wip-ish)
This commit is contained in:
parent
0124c35472
commit
51fb1c9f26
46 changed files with 3749 additions and 0 deletions
51
postillon/__main__.py
Normal file
51
postillon/__main__.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
import argparse
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from typing import *
|
||||
|
||||
import postillon
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
logging.basicConfig(
|
||||
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
||||
level=os.getenv("LOGLEVEL", "INFO"),
|
||||
)
|
||||
|
||||
|
||||
async def all_posts(feed_url, throttle: int = 10) -> AsyncIterable[postillon.Post]:
|
||||
"""We can't use feed's all_posts because blogger creates broken next URLs."""
|
||||
feed = postillon.Feed(feed_url, url="", next_url=feed_url)
|
||||
while (feed := feed.load_next()) :
|
||||
log.debug(f"New feed page: {feed}")
|
||||
if feed.next_url:
|
||||
feed.next_url = feed.next_url.replace(
|
||||
f"{postillon.FEED_URL}/-/Newsticker", postillon.FEED_URL
|
||||
)
|
||||
for post in feed.posts:
|
||||
yield post
|
||||
log.debug(f"Waiting for {throttle} seconds ...")
|
||||
await asyncio.sleep(throttle)
|
||||
|
||||
|
||||
async def dump_all(dbpath: str, feed_url: str):
|
||||
store = postillon.Store(dbpath)
|
||||
store.connect()
|
||||
async for post in all_posts(feed_url):
|
||||
store.add(postillon.split_post(post))
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--database", "--db", "-d", required=True, help="Path to database.sqlite"
|
||||
)
|
||||
parser.add_argument("--feed", "-f", default=postillon.FEED_URL, help="Feed URL")
|
||||
args = parser.parse_args()
|
||||
|
||||
asyncio.run(dump_all(args.database, args.feed))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue