allow ElementParser to find multiple matches
This commit is contained in:
parent
76dc9c12af
commit
c798f8d776
1 changed files with 14 additions and 6 deletions
|
|
@ -281,12 +281,15 @@ def capped_text(text: str, max_len: int, mark=" […]") -> str:
|
||||||
class ElementParser(HTMLParser):
|
class ElementParser(HTMLParser):
|
||||||
"""Parse HTML for the first matching element"""
|
"""Parse HTML for the first matching element"""
|
||||||
|
|
||||||
def __init__(self, selector: Callable[[str, Mapping[str, str]], bool]):
|
def __init__(
|
||||||
|
self, selector: Callable[[str, Mapping[str, str]], bool], stop_after_one=True
|
||||||
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.selector = selector
|
self.selector = selector
|
||||||
|
self.stop_after_one = stop_after_one
|
||||||
self.__active_tag = None
|
self.__active_tag = None
|
||||||
self.done = False
|
|
||||||
self.__value = ""
|
self.__value = ""
|
||||||
|
self.values = []
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
def handle_starttag(self, tag, attrs):
|
||||||
if self.selector(tag, attrs):
|
if self.selector(tag, attrs):
|
||||||
|
|
@ -294,21 +297,26 @@ class ElementParser(HTMLParser):
|
||||||
|
|
||||||
def handle_endtag(self, tag):
|
def handle_endtag(self, tag):
|
||||||
if tag == self.__active_tag:
|
if tag == self.__active_tag:
|
||||||
self.done = True
|
self.values.append(self.__value)
|
||||||
|
self.__value = ""
|
||||||
self.__active_tag = None
|
self.__active_tag = None
|
||||||
|
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
if self.__active_tag and not self.done:
|
if self.__active_tag:
|
||||||
self.__value += data
|
self.__value += data
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def value(self) -> Optional[str]:
|
def value(self) -> Optional[str]:
|
||||||
return self.__value if self.done else None
|
return self.values[0] if self.values else None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def done(self) -> bool:
|
||||||
|
return bool(self.values)
|
||||||
|
|
||||||
def load_chunks(self, content: Iterable[str]) -> None:
|
def load_chunks(self, content: Iterable[str]) -> None:
|
||||||
for chunk in content:
|
for chunk in content:
|
||||||
self.feed(chunk)
|
self.feed(chunk)
|
||||||
if self.done:
|
if self.stop_after_one and self.values:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue