allow ElementParser to find multiple matches
This commit is contained in:
parent
76dc9c12af
commit
c798f8d776
1 changed files with 14 additions and 6 deletions
|
|
@ -281,12 +281,15 @@ def capped_text(text: str, max_len: int, mark=" […]") -> str:
|
|||
class ElementParser(HTMLParser):
|
||||
"""Parse HTML for the first matching element"""
|
||||
|
||||
def __init__(self, selector: Callable[[str, Mapping[str, str]], bool]):
|
||||
def __init__(
|
||||
self, selector: Callable[[str, Mapping[str, str]], bool], stop_after_one=True
|
||||
):
|
||||
super().__init__()
|
||||
self.selector = selector
|
||||
self.stop_after_one = stop_after_one
|
||||
self.__active_tag = None
|
||||
self.done = False
|
||||
self.__value = ""
|
||||
self.values = []
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if self.selector(tag, attrs):
|
||||
|
|
@ -294,21 +297,26 @@ class ElementParser(HTMLParser):
|
|||
|
||||
def handle_endtag(self, tag):
|
||||
if tag == self.__active_tag:
|
||||
self.done = True
|
||||
self.values.append(self.__value)
|
||||
self.__value = ""
|
||||
self.__active_tag = None
|
||||
|
||||
def handle_data(self, data):
|
||||
if self.__active_tag and not self.done:
|
||||
if self.__active_tag:
|
||||
self.__value += data
|
||||
|
||||
@property
|
||||
def value(self) -> Optional[str]:
|
||||
return self.__value if self.done else None
|
||||
return self.values[0] if self.values else None
|
||||
|
||||
@property
|
||||
def done(self) -> bool:
|
||||
return bool(self.values)
|
||||
|
||||
def load_chunks(self, content: Iterable[str]) -> None:
|
||||
for chunk in content:
|
||||
self.feed(chunk)
|
||||
if self.done:
|
||||
if self.stop_after_one and self.values:
|
||||
break
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue