refactor loading of imdb ratings to yield loaded ratings
This commit is contained in:
parent
1ad7a79d33
commit
1805282d41
3 changed files with 60 additions and 34 deletions
|
|
@ -4,11 +4,9 @@ import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from . import config
|
from . import config
|
||||||
from .db import close_connection_pool, get_all, open_connection_pool
|
from .db import close_connection_pool, open_connection_pool
|
||||||
from .imdb import load_imdb
|
from .imdb import refresh_user_ratings_from_imdb
|
||||||
from .imdb_import import import_from_file
|
from .imdb_import import import_from_file
|
||||||
from .models import User
|
|
||||||
from .request import session
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -16,12 +14,11 @@ log = logging.getLogger(__name__)
|
||||||
async def run_load_user_ratings_from_imdb():
|
async def run_load_user_ratings_from_imdb():
|
||||||
await open_connection_pool()
|
await open_connection_pool()
|
||||||
|
|
||||||
with session() as s:
|
i = 0
|
||||||
s.headers["Accept-Language"] = "en-GB, en;q=0.5"
|
async for rating in refresh_user_ratings_from_imdb():
|
||||||
|
i += 1
|
||||||
|
|
||||||
for user in await get_all(User):
|
log.info("✨ Imported %s new ratings.", i)
|
||||||
log.info("Loading data for %s ... ⚡️", user.name)
|
|
||||||
await load_imdb(user.imdb_id)
|
|
||||||
|
|
||||||
await close_connection_pool()
|
await close_connection_pool()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,12 +2,12 @@ import logging
|
||||||
import re
|
import re
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Optional
|
from typing import Optional, Tuple
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
from .db import add_or_update_movie, add_or_update_rating, add_or_update_user
|
from . import db
|
||||||
from .models import Movie, Rating, User
|
from .models import Movie, Rating, User
|
||||||
from .request import cache_path, soup_from_url
|
from .request import cache_path, session, soup_from_url
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -34,10 +34,32 @@ log = logging.getLogger(__name__)
|
||||||
# p.text-muted.text ("Rated on 06 May 2021")
|
# p.text-muted.text ("Rated on 06 May 2021")
|
||||||
|
|
||||||
|
|
||||||
def imdb_url(user_id):
|
async def refresh_user_ratings_from_imdb(stop_on_dupe=True):
|
||||||
|
|
||||||
|
with session() as s:
|
||||||
|
s.headers["Accept-Language"] = "en-GB, en;q=0.5"
|
||||||
|
|
||||||
|
for user in await db.get_all(User):
|
||||||
|
|
||||||
|
log.info("⚡️ Loading data for %s ...", user.name)
|
||||||
|
|
||||||
|
async for rating, is_updated in load_ratings(user.imdb_id):
|
||||||
|
assert rating.user == user
|
||||||
|
|
||||||
|
if stop_on_dupe and not is_updated:
|
||||||
|
break
|
||||||
|
|
||||||
|
yield rating
|
||||||
|
|
||||||
|
|
||||||
|
def user_ratings_url(user_id):
|
||||||
return f"https://www.imdb.com/user/{user_id}/ratings"
|
return f"https://www.imdb.com/user/{user_id}/ratings"
|
||||||
|
|
||||||
|
|
||||||
|
def movie_url(imdb_id: str):
|
||||||
|
return f"https://www.imdb.com/title/{imdb_id}/"
|
||||||
|
|
||||||
|
|
||||||
def imdb_rating_from_score(score: int) -> float:
|
def imdb_rating_from_score(score: int) -> float:
|
||||||
"""Return the IMDb rating from an Unwind Movie score."""
|
"""Return the IMDb rating from an Unwind Movie score."""
|
||||||
assert 0 <= score <= 100
|
assert 0 <= score <= 100
|
||||||
|
|
@ -122,7 +144,9 @@ def movie_and_rating_from_item(item) -> tuple[Movie, Rating]:
|
||||||
ForgedRequest = namedtuple("ForgedRequest", "url headers")
|
ForgedRequest = namedtuple("ForgedRequest", "url headers")
|
||||||
|
|
||||||
|
|
||||||
async def parse_page(url, stop_on_dupe=True) -> Optional[str]:
|
async def parse_page(url) -> Tuple[list[Rating], Optional[str]]:
|
||||||
|
ratings = []
|
||||||
|
|
||||||
soup = soup_from_url(url)
|
soup = soup_from_url(url)
|
||||||
|
|
||||||
meta = soup.find("meta", property="pageId")
|
meta = soup.find("meta", property="pageId")
|
||||||
|
|
@ -131,7 +155,6 @@ async def parse_page(url, stop_on_dupe=True) -> Optional[str]:
|
||||||
user = User(imdb_id=meta["content"], name="")
|
user = User(imdb_id=meta["content"], name="")
|
||||||
if match := find_name(headline.string):
|
if match := find_name(headline.string):
|
||||||
user.name = match["name"]
|
user.name = match["name"]
|
||||||
await add_or_update_user(user)
|
|
||||||
|
|
||||||
items = soup.find_all("div", "lister-item-content")
|
items = soup.find_all("div", "lister-item-content")
|
||||||
for i, item in enumerate(items):
|
for i, item in enumerate(items):
|
||||||
|
|
@ -149,25 +172,35 @@ async def parse_page(url, stop_on_dupe=True) -> Optional[str]:
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
await add_or_update_movie(movie)
|
rating.user = user
|
||||||
|
rating.movie = movie
|
||||||
|
|
||||||
rating.user_id = user.id
|
ratings.append(rating)
|
||||||
rating.movie_id = movie.id # needs to be set _after_ movie has been updated
|
|
||||||
is_updated = await add_or_update_rating(rating)
|
|
||||||
|
|
||||||
if stop_on_dupe and not is_updated:
|
|
||||||
log.info("Import stopped after %s items. Caught up to known state. ✋", i)
|
|
||||||
return None
|
|
||||||
|
|
||||||
footer = soup.find("div", "footer")
|
footer = soup.find("div", "footer")
|
||||||
assert footer is not None
|
assert footer is not None
|
||||||
next_url = urljoin(url, footer.find(string=re.compile(r"Next")).parent["href"])
|
next_url = urljoin(url, footer.find(string=re.compile(r"Next")).parent["href"])
|
||||||
|
|
||||||
return next_url if url != next_url else None
|
return (ratings, next_url if url != next_url else None)
|
||||||
|
|
||||||
|
|
||||||
async def load_imdb(user_id):
|
async def load_ratings(user_id):
|
||||||
next_url = imdb_url(user_id)
|
next_url = user_ratings_url(user_id)
|
||||||
|
|
||||||
while next_url := await parse_page(next_url):
|
while next_url:
|
||||||
pass
|
|
||||||
|
ratings, next_url = await parse_page(next_url)
|
||||||
|
|
||||||
|
for i, rating in enumerate(ratings):
|
||||||
|
|
||||||
|
if i == 0:
|
||||||
|
# All rating objects share the same user.
|
||||||
|
await db.add_or_update_user(rating.user)
|
||||||
|
rating.user_id = rating.user.id
|
||||||
|
|
||||||
|
await db.add_or_update_movie(rating.movie)
|
||||||
|
rating.movie_id = rating.movie.id
|
||||||
|
|
||||||
|
is_updated = await db.add_or_update_rating(rating)
|
||||||
|
|
||||||
|
yield rating, is_updated
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ from starlette.middleware.authentication import AuthenticationMiddleware
|
||||||
from starlette.responses import JSONResponse
|
from starlette.responses import JSONResponse
|
||||||
from starlette.routing import Mount, Route
|
from starlette.routing import Mount, Route
|
||||||
|
|
||||||
from . import config, db
|
from . import config, db, imdb
|
||||||
from .db import close_connection_pool, find_ratings, open_connection_pool
|
from .db import close_connection_pool, find_ratings, open_connection_pool
|
||||||
from .middleware.responsetime import ResponseTimeMiddleware
|
from .middleware.responsetime import ResponseTimeMiddleware
|
||||||
from .models import Group, Movie, User, asplain
|
from .models import Group, Movie, User, asplain
|
||||||
|
|
@ -64,10 +64,6 @@ class BearerAuthBackend(AuthenticationBackend):
|
||||||
return AuthCredentials(["authenticated", *roles]), user
|
return AuthCredentials(["authenticated", *roles]), user
|
||||||
|
|
||||||
|
|
||||||
def imdb_url(imdb_id: str):
|
|
||||||
return f"https://www.imdb.com/title/{imdb_id}/"
|
|
||||||
|
|
||||||
|
|
||||||
def truthy(s: str):
|
def truthy(s: str):
|
||||||
return bool(s) and s.lower() in {"1", "yes", "true"}
|
return bool(s) and s.lower() in {"1", "yes", "true"}
|
||||||
|
|
||||||
|
|
@ -153,7 +149,7 @@ async def get_ratings_for_group(request):
|
||||||
"canonical_title": r["canonical_title"],
|
"canonical_title": r["canonical_title"],
|
||||||
"original_title": r["original_title"],
|
"original_title": r["original_title"],
|
||||||
"year": r["release_year"],
|
"year": r["release_year"],
|
||||||
"link": imdb_url(r["movie_imdb_id"]),
|
"link": imdb.movie_url(r["movie_imdb_id"]),
|
||||||
"user_scores": [],
|
"user_scores": [],
|
||||||
"imdb_score": r["imdb_score"],
|
"imdb_score": r["imdb_score"],
|
||||||
"media_type": r["media_type"],
|
"media_type": r["media_type"],
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue