use async requests to refresh user ratings

This commit is contained in:
ducklet 2023-02-04 17:55:22 +01:00
parent 60d38e9b49
commit 099770c80c
2 changed files with 62 additions and 3 deletions

View file

@ -6,7 +6,7 @@ from urllib.parse import urljoin
from . import db
from .models import Movie, Rating, User
from .request import cache_path, session, soup_from_url
from .request import asession, asoup_from_url, cache_path
log = logging.getLogger(__name__)
@ -35,7 +35,7 @@ log = logging.getLogger(__name__)
async def refresh_user_ratings_from_imdb(stop_on_dupe: bool = True):
with session() as s:
async with asession() as s:
s.headers["Accept-Language"] = "en-US, en;q=0.5"
for user in await db.get_all(User):
@ -152,7 +152,7 @@ ForgedRequest = namedtuple("ForgedRequest", "url headers")
async def parse_page(url: str) -> tuple[list[Rating], str | None]:
ratings = []
soup = soup_from_url(url)
soup = await asoup_from_url(url)
meta = soup.find("meta", property="pageId")
headline = soup.h1

View file

@ -202,6 +202,56 @@ def _http_get(s: _Session_T, url: str, *args, **kwds) -> _Response_T:
return resp
@_throttle(1, 1, random)
async def _ahttp_get(s: _ASession_T, url: str, *args, **kwds) -> _Response_T:
req = s.build_request(method="GET", url=url, *args, **kwds)
cachefile = cache_path(req) if config.debug else None
if cachefile:
if cachefile.exists():
log.debug(
"💾 loading %s (%a) from cache %s ...", req.url, req.headers, cachefile
)
with cachefile.open() as fp:
resp = _CachedResponse(**json.load(fp))
if 300 <= resp.status_code <= 399:
raise _RedirectError(
from_url=resp.url, to_url=resp.headers["location"], is_cached=True
)
return cast(_Response_T, resp)
log.debug("⚡️ loading %s (%a) ...", req.url, req.headers)
resp = await s.send(req, follow_redirects=False, stream=True)
resp.raise_for_status()
await resp.aread() # Download the response stream to allow `resp.text` access.
if cachefile:
log.debug(
"💾 writing response to cache: %s (%a) -> %s",
req.url,
req.headers,
cachefile,
)
with cachefile.open("w") as fp:
json.dump(
{
"status_code": resp.status_code,
"text": resp.text,
"url": str(resp.url),
"headers": dict(resp.headers),
},
fp,
)
if resp.is_redirect:
# Redirects could mean trouble, we need to stay on top of that!
raise _RedirectError(from_url=str(resp.url), to_url=resp.headers["location"])
return resp
def soup_from_url(url):
"""Return a BeautifulSoup instance from the contents for the given URL."""
with session() as s:
@ -211,6 +261,15 @@ def soup_from_url(url):
return soup
async def asoup_from_url(url):
"""Return a BeautifulSoup instance from the contents for the given URL."""
async with asession() as s:
r = await _ahttp_get(s, url)
soup = bs4.BeautifulSoup(r.text, "html5lib")
return soup
def _last_modified_from_response(resp: _Response_T) -> float | None:
if last_mod := resp.headers.get("last-modified"):
try: