45 lines
1,023 B
Python
45 lines
1,023 B
Python
|
|
import re
|
||
|
|
from dataclasses import replace
|
||
|
|
from html.parser import HTMLParser
|
||
|
|
from io import StringIO
|
||
|
|
from typing import *
|
||
|
|
|
||
|
|
from . import Post
|
||
|
|
|
||
|
|
FEED_URL = "https://www.blogger.com/feeds/746298260979647434/posts/default/-/Newsticker"
|
||
|
|
|
||
|
|
|
||
|
|
class TextonlyParser(HTMLParser):
|
||
|
|
def __init__(self):
|
||
|
|
super().__init__()
|
||
|
|
self.reset()
|
||
|
|
self.strict = False
|
||
|
|
self.convert_charrefs = True
|
||
|
|
self._text = StringIO()
|
||
|
|
|
||
|
|
def handle_data(self, d):
|
||
|
|
self._text.write(d)
|
||
|
|
|
||
|
|
@property
|
||
|
|
def text(self):
|
||
|
|
return self._text.getvalue()
|
||
|
|
|
||
|
|
|
||
|
|
def strip_tags(html):
|
||
|
|
s = TextonlyParser()
|
||
|
|
s.feed(html)
|
||
|
|
return s.text
|
||
|
|
|
||
|
|
|
||
|
|
find_tags = re.compile(r"\+\+\+ (.*?) \+\+\+").finditer
|
||
|
|
|
||
|
|
|
||
|
|
def feed_page(page: int = 1, per_page: int = 25) -> str:
|
||
|
|
start = 1 + (page - 1) * per_page
|
||
|
|
return f"{FEED_URL}?start-index={start}&max-results={per_page}"
|
||
|
|
|
||
|
|
|
||
|
|
def split_post(post: Post) -> Iterable[Post]:
|
||
|
|
for match in find_tags(strip_tags(post.content)):
|
||
|
|
yield replace(post, content=match[1])
|