use async requests to refresh user ratings
This commit is contained in:
parent
60d38e9b49
commit
099770c80c
2 changed files with 62 additions and 3 deletions
|
|
@ -6,7 +6,7 @@ from urllib.parse import urljoin
|
||||||
|
|
||||||
from . import db
|
from . import db
|
||||||
from .models import Movie, Rating, User
|
from .models import Movie, Rating, User
|
||||||
from .request import cache_path, session, soup_from_url
|
from .request import asession, asoup_from_url, cache_path
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -35,7 +35,7 @@ log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
async def refresh_user_ratings_from_imdb(stop_on_dupe: bool = True):
|
async def refresh_user_ratings_from_imdb(stop_on_dupe: bool = True):
|
||||||
with session() as s:
|
async with asession() as s:
|
||||||
s.headers["Accept-Language"] = "en-US, en;q=0.5"
|
s.headers["Accept-Language"] = "en-US, en;q=0.5"
|
||||||
|
|
||||||
for user in await db.get_all(User):
|
for user in await db.get_all(User):
|
||||||
|
|
@ -152,7 +152,7 @@ ForgedRequest = namedtuple("ForgedRequest", "url headers")
|
||||||
async def parse_page(url: str) -> tuple[list[Rating], str | None]:
|
async def parse_page(url: str) -> tuple[list[Rating], str | None]:
|
||||||
ratings = []
|
ratings = []
|
||||||
|
|
||||||
soup = soup_from_url(url)
|
soup = await asoup_from_url(url)
|
||||||
|
|
||||||
meta = soup.find("meta", property="pageId")
|
meta = soup.find("meta", property="pageId")
|
||||||
headline = soup.h1
|
headline = soup.h1
|
||||||
|
|
|
||||||
|
|
@ -202,6 +202,56 @@ def _http_get(s: _Session_T, url: str, *args, **kwds) -> _Response_T:
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
|
|
||||||
|
@_throttle(1, 1, random)
|
||||||
|
async def _ahttp_get(s: _ASession_T, url: str, *args, **kwds) -> _Response_T:
|
||||||
|
req = s.build_request(method="GET", url=url, *args, **kwds)
|
||||||
|
|
||||||
|
cachefile = cache_path(req) if config.debug else None
|
||||||
|
|
||||||
|
if cachefile:
|
||||||
|
if cachefile.exists():
|
||||||
|
log.debug(
|
||||||
|
"💾 loading %s (%a) from cache %s ...", req.url, req.headers, cachefile
|
||||||
|
)
|
||||||
|
with cachefile.open() as fp:
|
||||||
|
resp = _CachedResponse(**json.load(fp))
|
||||||
|
if 300 <= resp.status_code <= 399:
|
||||||
|
raise _RedirectError(
|
||||||
|
from_url=resp.url, to_url=resp.headers["location"], is_cached=True
|
||||||
|
)
|
||||||
|
return cast(_Response_T, resp)
|
||||||
|
|
||||||
|
log.debug("⚡️ loading %s (%a) ...", req.url, req.headers)
|
||||||
|
resp = await s.send(req, follow_redirects=False, stream=True)
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
await resp.aread() # Download the response stream to allow `resp.text` access.
|
||||||
|
|
||||||
|
if cachefile:
|
||||||
|
log.debug(
|
||||||
|
"💾 writing response to cache: %s (%a) -> %s",
|
||||||
|
req.url,
|
||||||
|
req.headers,
|
||||||
|
cachefile,
|
||||||
|
)
|
||||||
|
with cachefile.open("w") as fp:
|
||||||
|
json.dump(
|
||||||
|
{
|
||||||
|
"status_code": resp.status_code,
|
||||||
|
"text": resp.text,
|
||||||
|
"url": str(resp.url),
|
||||||
|
"headers": dict(resp.headers),
|
||||||
|
},
|
||||||
|
fp,
|
||||||
|
)
|
||||||
|
|
||||||
|
if resp.is_redirect:
|
||||||
|
# Redirects could mean trouble, we need to stay on top of that!
|
||||||
|
raise _RedirectError(from_url=str(resp.url), to_url=resp.headers["location"])
|
||||||
|
|
||||||
|
return resp
|
||||||
|
|
||||||
|
|
||||||
def soup_from_url(url):
|
def soup_from_url(url):
|
||||||
"""Return a BeautifulSoup instance from the contents for the given URL."""
|
"""Return a BeautifulSoup instance from the contents for the given URL."""
|
||||||
with session() as s:
|
with session() as s:
|
||||||
|
|
@ -211,6 +261,15 @@ def soup_from_url(url):
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
async def asoup_from_url(url):
|
||||||
|
"""Return a BeautifulSoup instance from the contents for the given URL."""
|
||||||
|
async with asession() as s:
|
||||||
|
r = await _ahttp_get(s, url)
|
||||||
|
|
||||||
|
soup = bs4.BeautifulSoup(r.text, "html5lib")
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
def _last_modified_from_response(resp: _Response_T) -> float | None:
|
def _last_modified_from_response(resp: _Response_T) -> float | None:
|
||||||
if last_mod := resp.headers.get("last-modified"):
|
if last_mod := resp.headers.get("last-modified"):
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue