allow ElementParser to find multiple matches

This commit is contained in:
ducklet 2020-11-14 14:33:12 +01:00
parent 76dc9c12af
commit c798f8d776

View file

@ -281,12 +281,15 @@ def capped_text(text: str, max_len: int, mark=" […]") -> str:
class ElementParser(HTMLParser): class ElementParser(HTMLParser):
"""Parse HTML for the first matching element""" """Parse HTML for the first matching element"""
def __init__(self, selector: Callable[[str, Mapping[str, str]], bool]): def __init__(
self, selector: Callable[[str, Mapping[str, str]], bool], stop_after_one=True
):
super().__init__() super().__init__()
self.selector = selector self.selector = selector
self.stop_after_one = stop_after_one
self.__active_tag = None self.__active_tag = None
self.done = False
self.__value = "" self.__value = ""
self.values = []
def handle_starttag(self, tag, attrs): def handle_starttag(self, tag, attrs):
if self.selector(tag, attrs): if self.selector(tag, attrs):
@ -294,21 +297,26 @@ class ElementParser(HTMLParser):
def handle_endtag(self, tag): def handle_endtag(self, tag):
if tag == self.__active_tag: if tag == self.__active_tag:
self.done = True self.values.append(self.__value)
self.__value = ""
self.__active_tag = None self.__active_tag = None
def handle_data(self, data): def handle_data(self, data):
if self.__active_tag and not self.done: if self.__active_tag:
self.__value += data self.__value += data
@property @property
def value(self) -> Optional[str]: def value(self) -> Optional[str]:
return self.__value if self.done else None return self.values[0] if self.values else None
@property
def done(self) -> bool:
return bool(self.values)
def load_chunks(self, content: Iterable[str]) -> None: def load_chunks(self, content: Iterable[str]) -> None:
for chunk in content: for chunk in content:
self.feed(chunk) self.feed(chunk)
if self.done: if self.stop_after_one and self.values:
break break