dump current state (wip-ish)
This commit is contained in:
parent
0124c35472
commit
51fb1c9f26
46 changed files with 3749 additions and 0 deletions
44
postillon/postbox.py
Normal file
44
postillon/postbox.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
import re
|
||||
from dataclasses import replace
|
||||
from html.parser import HTMLParser
|
||||
from io import StringIO
|
||||
from typing import *
|
||||
|
||||
from . import Post
|
||||
|
||||
FEED_URL = "https://www.blogger.com/feeds/746298260979647434/posts/default/-/Newsticker"
|
||||
|
||||
|
||||
class TextonlyParser(HTMLParser):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.reset()
|
||||
self.strict = False
|
||||
self.convert_charrefs = True
|
||||
self._text = StringIO()
|
||||
|
||||
def handle_data(self, d):
|
||||
self._text.write(d)
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
return self._text.getvalue()
|
||||
|
||||
|
||||
def strip_tags(html):
|
||||
s = TextonlyParser()
|
||||
s.feed(html)
|
||||
return s.text
|
||||
|
||||
|
||||
find_tags = re.compile(r"\+\+\+ (.*?) \+\+\+").finditer
|
||||
|
||||
|
||||
def feed_page(page: int = 1, per_page: int = 25) -> str:
|
||||
start = 1 + (page - 1) * per_page
|
||||
return f"{FEED_URL}?start-index={start}&max-results={per_page}"
|
||||
|
||||
|
||||
def split_post(post: Post) -> Iterable[Post]:
|
||||
for match in find_tags(strip_tags(post.content)):
|
||||
yield replace(post, content=match[1])
|
||||
Loading…
Add table
Add a link
Reference in a new issue