diff --git a/hotdog/bot.py b/hotdog/bot.py index ab850ca..25d99d3 100644 --- a/hotdog/bot.py +++ b/hotdog/bot.py @@ -140,7 +140,7 @@ class Bot: log.info(f"#{room.display_name} <{room.user_name(event.sender)}> {event.body}") - msg = Message(self, event.body, room, event) + msg = Message(self, room, event) tasks = {} for h in self.message_handlers: diff --git a/hotdog/html.py b/hotdog/html.py new file mode 100644 index 0000000..e0864b7 --- /dev/null +++ b/hotdog/html.py @@ -0,0 +1,148 @@ +import html.parser +import typing +from dataclasses import dataclass, field + + +@dataclass +class HtmlNode: + parent: "HtmlNode" = field(repr=False) + children: typing.List["HtmlNode"] + + def __getattr__(self, name): + for c in self.children: + if type(c) is HtmlElement and c.name.replace("-", "_") == name: + return c + raise IndexError(f"No such element: {name}") + + +@dataclass(init=False) +class HtmlDocument(HtmlNode): + def __init__(self): + super().__init__(parent=self, children=[]) + + +@dataclass +class HtmlElement(HtmlNode): + name: str + attrs: typing.Mapping[str, str] + + def __getitem__(self, name): + return self.attrs[name] + + +@dataclass +class HtmlText(HtmlNode): + content: str + children: typing.List[HtmlNode] = field( + repr=False, init=False, default_factory=list + ) + + +class HtmlParser(html.parser.HTMLParser): + """Parse HTML for the first matching element""" + + def __init__(self): + super().__init__() + self.document = HtmlDocument() + self.document.parent = self.document + self.__current = self.document + + def handle_starttag(self, tag, attrs): + parent = self.__current + self.__current = HtmlElement( + name=tag, attrs=dict(attrs), parent=parent, children=[] + ) + parent.children.append(self.__current) + + def handle_endtag(self, tag): + while not is_root(self.__current): + done = self.__current.name == tag + self.__current = self.__current.parent + if done: + break + + def handle_data(self, data): + cur = self.__current + if cur.children and type(cur.children[-1]) is HtmlText: + cur.children[-1].content += data + else: + cur.children.append(HtmlText(parent=cur, content=data)) + + +def is_root(node: HtmlNode): + return node is node.parent + + +def root(node: HtmlNode) -> HtmlNode: + el = node + while not is_root(el): + el = el.parent + return el + + +iter_next = next + + +def next(node: HtmlNode) -> typing.Optional[HtmlNode]: + if is_root(node): + return None + children = iter(node.parent.children) + for c in children: + if c is node: + return iter_next(children, None) + return None # should never be reached ... + + +def prev(node: HtmlNode) -> typing.Optional[HtmlNode]: + if is_root(node): + return None + prev = None + for c in node.parent.children: + if c is node: + break + prev = c + return prev + + +def text(node: HtmlNode) -> str: + return "".join( + (c.content if type(c) is HtmlText else text(c)) for c in node.children + ) + + +def find( + node: HtmlNode, + name=None, + *, + pred: typing.Callable[[HtmlNode], bool] = None, + class_=None, + id=None, +) -> typing.Iterable[HtmlNode]: + assert name or pred or class_ or id + assert not (pred and (name or class_ or id)) + if name: + pred = lambda n: (type(n) is HtmlElement and name == n.name) + if class_: + pred = lambda n: ( + type(n) is HtmlElement and class_ in n.attrs.get("class", " ").split() + ) + if id: + pred = lambda n: (type(n) is HtmlElement and id == n.attrs.get("id")) + if pred(node): + yield node + else: + for c in node.children: + yield from find(c, pred=pred) + + +def detach(node: HtmlNode): + """Remove the node from its parent.""" + if is_root(node): + return + node.parent.children = [c for c in node.parent.children if c is not node] + + +def parse_html(html) -> HtmlNode: + parser = HtmlParser() + parser.feed(html) + return parser.document diff --git a/hotdog/models.py b/hotdog/models.py index e33eea1..89d937d 100644 --- a/hotdog/models.py +++ b/hotdog/models.py @@ -6,6 +6,8 @@ from typing import * import nio +from .html import HtmlDocument, parse_html + JobCallback = Callable[["Job"], None] @@ -65,9 +67,9 @@ class Tokens(Tuple[str]): @dataclass class Message: app: "Bot" - text: str room: nio.rooms.MatrixRoom event: nio.events.room_events.RoomMessageText + text: str = None tokens: Tokens = ( None # The text split up into clean tokens, to be used for command handlers. ) @@ -77,6 +79,7 @@ class Message: is_for_me: bool = False # Wether the user addressed the bot using its name. command: Optional[str] = None # The command keyword issued by the user, if any. args: Optional[Tokens] = None # args will always be set if command is set + html: Optional[HtmlDocument] = None @property def sender_name(self) -> str: @@ -96,7 +99,16 @@ class Message: # return self.args or self.tokens def __post_init__(self): - self.tokens = Tokens.from_str(self.text) + plain = self.event.body + html = ( + parse_html(self.event.formatted_body) + if self.event.format == "org.matrix.custom.html" + else None + ) + + self.text = plain + self.html = html + self.tokens = Tokens.from_str(plain) self.words = self.tokens """ @@ -108,7 +120,7 @@ class Message: """ first_arg = self.tokens.str(0) - if self.text.startswith(self.app.config.command_prefix): + if plain.startswith(self.app.config.command_prefix): self.command = first_arg[len(self.app.config.command_prefix) :] self.args = self.tokens[1:] self.words = self.args