use async requests to refresh user ratings
This commit is contained in:
parent
60d38e9b49
commit
099770c80c
2 changed files with 62 additions and 3 deletions
|
|
@ -6,7 +6,7 @@ from urllib.parse import urljoin
|
|||
|
||||
from . import db
|
||||
from .models import Movie, Rating, User
|
||||
from .request import cache_path, session, soup_from_url
|
||||
from .request import asession, asoup_from_url, cache_path
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -35,7 +35,7 @@ log = logging.getLogger(__name__)
|
|||
|
||||
|
||||
async def refresh_user_ratings_from_imdb(stop_on_dupe: bool = True):
|
||||
with session() as s:
|
||||
async with asession() as s:
|
||||
s.headers["Accept-Language"] = "en-US, en;q=0.5"
|
||||
|
||||
for user in await db.get_all(User):
|
||||
|
|
@ -152,7 +152,7 @@ ForgedRequest = namedtuple("ForgedRequest", "url headers")
|
|||
async def parse_page(url: str) -> tuple[list[Rating], str | None]:
|
||||
ratings = []
|
||||
|
||||
soup = soup_from_url(url)
|
||||
soup = await asoup_from_url(url)
|
||||
|
||||
meta = soup.find("meta", property="pageId")
|
||||
headline = soup.h1
|
||||
|
|
|
|||
|
|
@ -202,6 +202,56 @@ def _http_get(s: _Session_T, url: str, *args, **kwds) -> _Response_T:
|
|||
return resp
|
||||
|
||||
|
||||
@_throttle(1, 1, random)
|
||||
async def _ahttp_get(s: _ASession_T, url: str, *args, **kwds) -> _Response_T:
|
||||
req = s.build_request(method="GET", url=url, *args, **kwds)
|
||||
|
||||
cachefile = cache_path(req) if config.debug else None
|
||||
|
||||
if cachefile:
|
||||
if cachefile.exists():
|
||||
log.debug(
|
||||
"💾 loading %s (%a) from cache %s ...", req.url, req.headers, cachefile
|
||||
)
|
||||
with cachefile.open() as fp:
|
||||
resp = _CachedResponse(**json.load(fp))
|
||||
if 300 <= resp.status_code <= 399:
|
||||
raise _RedirectError(
|
||||
from_url=resp.url, to_url=resp.headers["location"], is_cached=True
|
||||
)
|
||||
return cast(_Response_T, resp)
|
||||
|
||||
log.debug("⚡️ loading %s (%a) ...", req.url, req.headers)
|
||||
resp = await s.send(req, follow_redirects=False, stream=True)
|
||||
resp.raise_for_status()
|
||||
|
||||
await resp.aread() # Download the response stream to allow `resp.text` access.
|
||||
|
||||
if cachefile:
|
||||
log.debug(
|
||||
"💾 writing response to cache: %s (%a) -> %s",
|
||||
req.url,
|
||||
req.headers,
|
||||
cachefile,
|
||||
)
|
||||
with cachefile.open("w") as fp:
|
||||
json.dump(
|
||||
{
|
||||
"status_code": resp.status_code,
|
||||
"text": resp.text,
|
||||
"url": str(resp.url),
|
||||
"headers": dict(resp.headers),
|
||||
},
|
||||
fp,
|
||||
)
|
||||
|
||||
if resp.is_redirect:
|
||||
# Redirects could mean trouble, we need to stay on top of that!
|
||||
raise _RedirectError(from_url=str(resp.url), to_url=resp.headers["location"])
|
||||
|
||||
return resp
|
||||
|
||||
|
||||
def soup_from_url(url):
|
||||
"""Return a BeautifulSoup instance from the contents for the given URL."""
|
||||
with session() as s:
|
||||
|
|
@ -211,6 +261,15 @@ def soup_from_url(url):
|
|||
return soup
|
||||
|
||||
|
||||
async def asoup_from_url(url):
|
||||
"""Return a BeautifulSoup instance from the contents for the given URL."""
|
||||
async with asession() as s:
|
||||
r = await _ahttp_get(s, url)
|
||||
|
||||
soup = bs4.BeautifulSoup(r.text, "html5lib")
|
||||
return soup
|
||||
|
||||
|
||||
def _last_modified_from_response(resp: _Response_T) -> float | None:
|
||||
if last_mod := resp.headers.get("last-modified"):
|
||||
try:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue