add IMDb vote count to movies
This commit is contained in:
parent
af25d9c5a2
commit
8d20cc040e
6 changed files with 51 additions and 11 deletions
|
|
@ -379,7 +379,7 @@ async def find_ratings(
|
|||
FROM {Rating._table}
|
||||
LEFT JOIN {Movie._table} ON {Movie._table}.id={Rating._table}.movie_id
|
||||
WHERE {user_condition}{(' AND ' + ' AND '.join(conditions)) if conditions else ''}
|
||||
ORDER BY length({Movie._table}.title) ASC, {Rating._table}.rating_date DESC, {Movie._table}.score DESC
|
||||
ORDER BY length({Movie._table}.title) ASC, {Rating._table}.rating_date DESC, {Movie._table}.imdb_score DESC
|
||||
LIMIT :limit_rows
|
||||
)"""
|
||||
]
|
||||
|
|
@ -393,7 +393,7 @@ async def find_ratings(
|
|||
FROM {Movie._table}
|
||||
WHERE id NOT IN newest_movies
|
||||
{('AND ' + ' AND '.join(conditions)) if conditions else ''}
|
||||
ORDER BY length(title) ASC, score DESC, release_year DESC
|
||||
ORDER BY length(title) ASC, imdb_score DESC, release_year DESC
|
||||
LIMIT :limit_rows
|
||||
)""",
|
||||
f"""{source_table} AS (
|
||||
|
|
@ -412,7 +412,8 @@ async def find_ratings(
|
|||
SELECT
|
||||
{Rating._table}.score AS user_score,
|
||||
{Rating._table}.user_id AS user_id,
|
||||
{Movie._table}.score AS imdb_score,
|
||||
{Movie._table}.imdb_score,
|
||||
{Movie._table}.imdb_votes,
|
||||
{Movie._table}.imdb_id AS movie_imdb_id,
|
||||
{Movie._table}.media_type AS media_type,
|
||||
{Movie._table}.title AS canonical_title,
|
||||
|
|
|
|||
|
|
@ -136,7 +136,7 @@ def movie_and_rating_from_item(item) -> tuple[Movie, Rating]:
|
|||
rating.score = score_from_imdb_rating(float(rating_item.string))
|
||||
if match := ratings_item.find("div", "ipl-rating-star small"):
|
||||
if rating_item := match.find("span", "ipl-rating-star__rating"):
|
||||
movie.score = score_from_imdb_rating(float(rating_item.string))
|
||||
movie.imdb_score = score_from_imdb_rating(float(rating_item.string))
|
||||
|
||||
return movie, rating
|
||||
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ class BasicRow:
|
|||
release_year=self.startYear,
|
||||
media_type=title_types[self.titleType],
|
||||
imdb_id=self.tconst,
|
||||
score=None,
|
||||
imdb_score=None,
|
||||
runtime=self.runtimeMinutes,
|
||||
genres=self.genres or set(),
|
||||
updated=None, # optimization: skip default factory
|
||||
|
|
@ -79,7 +79,8 @@ class RatingRow:
|
|||
def as_movie(self):
|
||||
return Movie(
|
||||
imdb_id=self.tconst,
|
||||
score=score_from_imdb_rating(self.averageRating),
|
||||
imdb_score=score_from_imdb_rating(self.averageRating),
|
||||
imdb_votes=self.numVotes,
|
||||
updated=None, # optimization: skip default factory
|
||||
id=None, # optimization: skip default factory
|
||||
)
|
||||
|
|
@ -158,11 +159,11 @@ def read_ratings(path):
|
|||
yield m
|
||||
|
||||
|
||||
def read_ratings_as_scoremap(path):
|
||||
def read_ratings_as_mapping(path):
|
||||
"""Optimized function to quickly load all ratings."""
|
||||
rows = read_imdb_tsv(path, RatingRow, unpack=False)
|
||||
rows = cast(list[list[str]], rows)
|
||||
return {r[0]: round(100 * (float(r[1]) - 1) / 9) for r in rows}
|
||||
return {r[0]: (round(100 * (float(r[1]) - 1) / 9), int(r[2])) for r in rows}
|
||||
|
||||
|
||||
def read_basics(path):
|
||||
|
|
@ -181,7 +182,7 @@ def read_basics(path):
|
|||
|
||||
async def import_from_file(*, basics_path: Path, ratings_path: Path):
|
||||
log.info("💾 Loading scores ...")
|
||||
scores = read_ratings_as_scoremap(ratings_path)
|
||||
ratings = read_ratings_as_mapping(ratings_path)
|
||||
|
||||
log.info("💾 Importing movies ...")
|
||||
total = count_lines(basics_path)
|
||||
|
|
@ -212,7 +213,7 @@ async def import_from_file(*, basics_path: Path, ratings_path: Path):
|
|||
log.debug("Skipping movie, unwanted media type: %s", m.media_type)
|
||||
continue
|
||||
|
||||
m.score = scores.get(m.imdb_id)
|
||||
m.imdb_score, m.imdb_votes = ratings.get(m.imdb_id, [None, None])
|
||||
chunk.append(m)
|
||||
|
||||
if len(chunk) > 1000:
|
||||
|
|
|
|||
|
|
@ -152,7 +152,8 @@ class Movie:
|
|||
release_year: int = None # canonical release date
|
||||
media_type: str = None
|
||||
imdb_id: str = None
|
||||
score: Optional[int] = None # range: [0,100]
|
||||
imdb_score: Optional[int] = None # range: [0,100]
|
||||
imdb_votes: Optional[int] = None
|
||||
runtime: Optional[int] = None # minutes
|
||||
genres: set[str] = None
|
||||
updated: datetime = field(default_factory=utcnow)
|
||||
|
|
|
|||
36
unwind/sql/20210720-213416.sql
Normal file
36
unwind/sql/20210720-213416.sql
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
-- add IMDb vote count
|
||||
|
||||
CREATE TABLE _migrate_movies (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
title TEXT NOT NULL,
|
||||
original_title TEXT,
|
||||
release_year INTEGER NOT NULL,
|
||||
media_type TEXT NOT NULL,
|
||||
imdb_id TEXT NOT NULL UNIQUE,
|
||||
imdb_score INTEGER,
|
||||
imdb_votes INTEGER,
|
||||
runtime INTEGER,
|
||||
genres TEXT NOT NULL,
|
||||
updated TEXT NOT NULL
|
||||
);;
|
||||
|
||||
INSERT INTO _migrate_movies
|
||||
SELECT
|
||||
id,
|
||||
title,
|
||||
original_title,
|
||||
release_year,
|
||||
media_type,
|
||||
imdb_id,
|
||||
score AS imdb_score,
|
||||
NULL AS imdb_votes,
|
||||
runtime,
|
||||
genres,
|
||||
updated
|
||||
FROM movies
|
||||
WHERE true;;
|
||||
|
||||
DROP TABLE movies;;
|
||||
|
||||
ALTER TABLE _migrate_movies
|
||||
RENAME TO movies;;
|
||||
|
|
@ -154,6 +154,7 @@ async def get_ratings_for_group(request):
|
|||
"link": imdb.movie_url(r["movie_imdb_id"]),
|
||||
"user_scores": [],
|
||||
"imdb_score": r["imdb_score"],
|
||||
"imdb_votes": r["imdb_votes"],
|
||||
"media_type": r["media_type"],
|
||||
},
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue