The previous version had all 100 movies rendered into the HTML. The new version has only the top 25 rendered into HTML, but the whole list has been made available as LD+JSON data. Since we can easily support both, we don't (yet) remove the old parser.
191 lines
5.8 KiB
Python
191 lines
5.8 KiB
Python
import bz2
|
|
import json
|
|
from pathlib import Path
|
|
from unittest.mock import AsyncMock
|
|
|
|
import bs4
|
|
import pytest
|
|
|
|
from unwind import imdb
|
|
from unwind.imdb import imdb_rating_from_score, score_from_imdb_rating
|
|
|
|
testsdir = Path(__file__).parent
|
|
fixturesdir = testsdir / "fixtures"
|
|
|
|
|
|
@pytest.mark.parametrize("rating", (x / 10 for x in range(10, 101)))
|
|
def test_rating_conversion(rating: float):
|
|
assert rating == imdb_rating_from_score(score_from_imdb_rating(rating))
|
|
|
|
|
|
@pytest.mark.parametrize("score", range(0, 101))
|
|
def test_score_conversion(score: int):
|
|
# Because our score covers 101 discrete values and IMDb's rating only 91
|
|
# discrete values, the mapping is non-injective, i.e. 10 values can't be
|
|
# mapped uniquely.
|
|
non_injective = set(range(5, 100, 10))
|
|
if score in non_injective:
|
|
pytest.skip(f"Score cannot be mapped back correctly: {score}")
|
|
|
|
assert score == score_from_imdb_rating(imdb_rating_from_score(score))
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"fixture",
|
|
(
|
|
("most_popular_100.html.bz2"),
|
|
("most_popular_100-20240714.html.bz2"),
|
|
),
|
|
)
|
|
@pytest.mark.asyncio
|
|
async def test_load_most_popular_100(monkeypatch, fixture: str):
|
|
with bz2.open(fixturesdir / fixture, "rb") as f:
|
|
html = f.read()
|
|
soup = bs4.BeautifulSoup(html, "html5lib")
|
|
|
|
monkeypatch.setattr(imdb, "asoup_from_url", AsyncMock(return_value=soup))
|
|
|
|
movie_ids = await imdb.load_most_popular_100()
|
|
assert len(set(movie_ids)) == 100
|
|
assert all(id_.startswith("tt") for id_ in movie_ids)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"fixture",
|
|
(
|
|
("bottom_100.html.bz2"),
|
|
("bottom_100-20240714.html.bz2"),
|
|
),
|
|
)
|
|
@pytest.mark.asyncio
|
|
async def test_load_bottom_100(monkeypatch, fixture: str):
|
|
with bz2.open(fixturesdir / fixture, "rb") as f:
|
|
html = f.read()
|
|
soup = bs4.BeautifulSoup(html, "html5lib")
|
|
|
|
monkeypatch.setattr(imdb, "asoup_from_url", AsyncMock(return_value=soup))
|
|
|
|
movie_ids = await imdb.load_bottom_100()
|
|
assert len(set(movie_ids)) == 100
|
|
assert all(id_.startswith("tt") for id_ in movie_ids)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_load_top_250(monkeypatch):
|
|
with bz2.open(fixturesdir / "top250.gql.json.bz2", "rb") as f:
|
|
jsonstr = f.read()
|
|
|
|
monkeypatch.setattr(imdb, "adownload", AsyncMock(return_value=jsonstr))
|
|
|
|
movie_ids = await imdb.load_top_250()
|
|
assert len(movie_ids) == 250
|
|
assert all(id_.startswith("tt") for id_ in movie_ids)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_load_ratings_page(monkeypatch):
|
|
with bz2.open(fixturesdir / "ratings-ur655321.html.bz2", "rb") as f:
|
|
html = f.read()
|
|
soup = bs4.BeautifulSoup(html, "html5lib")
|
|
|
|
monkeypatch.setattr(imdb, "asoup_from_url", AsyncMock(return_value=soup))
|
|
|
|
page = await imdb._load_ratings_page("fakeurl", "ur655321")
|
|
assert len(page.ratings) == 100
|
|
assert page.imdb_user_id is not None
|
|
assert page.imdb_user_id == "ur655321"
|
|
assert page.imdb_user_name == "AlexUltra"
|
|
assert page.next_page_url is not None
|
|
assert page.next_page_url.startswith("/user/ur655321/ratings?")
|
|
|
|
|
|
def _mock_response(content: bytes):
|
|
class MockResponse:
|
|
def raise_for_status(self):
|
|
pass
|
|
|
|
def json(self):
|
|
return json.loads(content)
|
|
|
|
return MockResponse()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_load_ratings_page_20240510(monkeypatch):
|
|
with bz2.open(fixturesdir / "ratings-ur655321-20240510.html.bz2", "rb") as f:
|
|
html = f.read()
|
|
soup = bs4.BeautifulSoup(html, "html5lib")
|
|
monkeypatch.setattr(imdb, "asoup_from_url", AsyncMock(return_value=soup))
|
|
|
|
with bz2.open(fixturesdir / "ratings-ur655321-20240510.gql.json.bz2", "rb") as f:
|
|
jsonstr = f.read()
|
|
async with imdb.asession() as s:
|
|
monkeypatch.setattr(s, "post", AsyncMock(return_value=_mock_response(jsonstr)))
|
|
page = await imdb._load_ratings_page("fakeurl", "ur655321")
|
|
assert len(page.ratings) == 100
|
|
assert page.imdb_user_id is not None
|
|
assert page.imdb_user_id == "ur655321"
|
|
assert page.imdb_user_name == "AlexUltra"
|
|
assert page.next_page_url is None, "not supported for new ratings page"
|
|
|
|
def movie(item: dict):
|
|
for rating in page.ratings:
|
|
assert rating.movie
|
|
if rating.movie.imdb_id == item["imdb_id"]:
|
|
rating_dict = {key: getattr(rating.movie, key) for key in item.keys()}
|
|
return rating_dict
|
|
raise AssertionError()
|
|
|
|
a_movie = {
|
|
"title": "Kung Fu Panda 4",
|
|
"release_year": 2024,
|
|
"media_type": "Movie",
|
|
"imdb_id": "tt21692408",
|
|
"imdb_score": 59,
|
|
"imdb_votes": 36000,
|
|
"runtime": 94,
|
|
}
|
|
assert a_movie == movie(a_movie)
|
|
|
|
a_running_tvseries = {
|
|
"title": "Palm Royale",
|
|
"release_year": 2024,
|
|
"media_type": "TV Series",
|
|
"imdb_id": "tt8888540",
|
|
"imdb_score": 64,
|
|
"imdb_votes": 6000,
|
|
}
|
|
assert a_running_tvseries == movie(a_running_tvseries)
|
|
|
|
a_finished_tvseries = {
|
|
"title": "Fawlty Towers",
|
|
"release_year": 1975,
|
|
"media_type": "TV Series",
|
|
"imdb_id": "tt0072500",
|
|
"imdb_score": 87,
|
|
"imdb_votes": 100000,
|
|
}
|
|
assert a_finished_tvseries == movie(a_finished_tvseries)
|
|
|
|
a_tvepisode = {
|
|
"title": "Columbo / No Time to Die",
|
|
"original_title": None,
|
|
"release_year": 1992,
|
|
"media_type": "TV Episode",
|
|
"imdb_id": "tt0103987",
|
|
"imdb_score": 59,
|
|
"imdb_votes": 2100,
|
|
"runtime": 98,
|
|
}
|
|
assert a_tvepisode == movie(a_tvepisode)
|
|
|
|
a_videogame = {
|
|
"title": "Alan Wake",
|
|
"original_title": None,
|
|
"release_year": 2010,
|
|
"media_type": "Video Game",
|
|
"imdb_id": "tt0466662",
|
|
"imdb_score": 82,
|
|
"imdb_votes": 7300,
|
|
}
|
|
assert a_videogame == movie(a_videogame)
|