import bz2 import json from pathlib import Path from unittest.mock import AsyncMock import bs4 import pytest from unwind import imdb from unwind.imdb import imdb_rating_from_score, score_from_imdb_rating testsdir = Path(__file__).parent fixturesdir = testsdir / "fixtures" @pytest.mark.parametrize("rating", (x / 10 for x in range(10, 101))) def test_rating_conversion(rating: float): assert rating == imdb_rating_from_score(score_from_imdb_rating(rating)) @pytest.mark.parametrize("score", range(0, 101)) def test_score_conversion(score: int): # Because our score covers 101 discrete values and IMDb's rating only 91 # discrete values, the mapping is non-injective, i.e. 10 values can't be # mapped uniquely. non_injective = set(range(5, 100, 10)) if score in non_injective: pytest.skip(f"Score cannot be mapped back correctly: {score}") assert score == score_from_imdb_rating(imdb_rating_from_score(score)) @pytest.mark.parametrize( "fixture", ( ("most_popular_100.html.bz2"), ("most_popular_100-20240714.html.bz2"), ), ) @pytest.mark.asyncio async def test_load_most_popular_100(monkeypatch, fixture: str): with bz2.open(fixturesdir / fixture, "rb") as f: html = f.read() soup = bs4.BeautifulSoup(html, "html5lib") monkeypatch.setattr(imdb, "asoup_from_url", AsyncMock(return_value=soup)) movie_ids = await imdb.load_most_popular_100() assert len(set(movie_ids)) == 100 assert all(id_.startswith("tt") for id_ in movie_ids) @pytest.mark.parametrize( "fixture", ( ("bottom_100.html.bz2"), ("bottom_100-20240714.html.bz2"), ), ) @pytest.mark.asyncio async def test_load_bottom_100(monkeypatch, fixture: str): with bz2.open(fixturesdir / fixture, "rb") as f: html = f.read() soup = bs4.BeautifulSoup(html, "html5lib") monkeypatch.setattr(imdb, "asoup_from_url", AsyncMock(return_value=soup)) movie_ids = await imdb.load_bottom_100() assert len(set(movie_ids)) == 100 assert all(id_.startswith("tt") for id_ in movie_ids) @pytest.mark.asyncio async def test_load_top_250(monkeypatch): with bz2.open(fixturesdir / "top250.gql.json.bz2", "rb") as f: jsonstr = f.read() monkeypatch.setattr(imdb, "adownload", AsyncMock(return_value=jsonstr)) movie_ids = await imdb.load_top_250() assert len(movie_ids) == 250 assert all(id_.startswith("tt") for id_ in movie_ids) @pytest.mark.asyncio async def test_load_ratings_page(monkeypatch): with bz2.open(fixturesdir / "ratings-ur655321.html.bz2", "rb") as f: html = f.read() soup = bs4.BeautifulSoup(html, "html5lib") monkeypatch.setattr(imdb, "asoup_from_url", AsyncMock(return_value=soup)) page = await imdb._load_ratings_page("fakeurl", "ur655321") assert len(page.ratings) == 100 assert page.imdb_user_id is not None assert page.imdb_user_id == "ur655321" assert page.imdb_user_name == "AlexUltra" assert page.next_page_url is not None assert page.next_page_url.startswith("/user/ur655321/ratings?") def _mock_response(content: bytes): class MockResponse: def raise_for_status(self): pass def json(self): return json.loads(content) return MockResponse() @pytest.mark.asyncio async def test_load_ratings_page_20240510(monkeypatch): with bz2.open(fixturesdir / "ratings-ur655321-20240510.html.bz2", "rb") as f: html = f.read() soup = bs4.BeautifulSoup(html, "html5lib") monkeypatch.setattr(imdb, "asoup_from_url", AsyncMock(return_value=soup)) with bz2.open(fixturesdir / "ratings-ur655321-20240510.gql.json.bz2", "rb") as f: jsonstr = f.read() async with imdb.asession() as s: monkeypatch.setattr(s, "post", AsyncMock(return_value=_mock_response(jsonstr))) page = await imdb._load_ratings_page("fakeurl", "ur655321") assert len(page.ratings) == 100 assert page.imdb_user_id is not None assert page.imdb_user_id == "ur655321" assert page.imdb_user_name == "AlexUltra" assert page.next_page_url is None, "not supported for new ratings page" def movie(item: dict): for rating in page.ratings: assert rating.movie if rating.movie.imdb_id == item["imdb_id"]: rating_dict = {key: getattr(rating.movie, key) for key in item.keys()} return rating_dict raise AssertionError(f"{item['imdb_id']} not found in page.ratings") a_movie = { "title": "Kung Fu Panda 4", "release_year": 2024, "media_type": "Movie", "imdb_id": "tt21692408", "imdb_score": 59, "imdb_votes": 36069, "runtime": 94, "genres": {"Action", "Adventure", "Animation"}, } assert a_movie == movie(a_movie) a_running_tvseries = { "title": "Palm Royale", "release_year": 2024, "media_type": "TV Series", "imdb_id": "tt8888540", "imdb_score": 64, "imdb_votes": 6044, "genres": {"Drama"}, } assert a_running_tvseries == movie(a_running_tvseries) a_finished_tvseries = { "title": "Fawlty Towers", "release_year": 1975, "media_type": "TV Series", "imdb_id": "tt0072500", "imdb_score": 87, "imdb_votes": 100261, "genres": {"Comedy"}, } assert a_finished_tvseries == movie(a_finished_tvseries) a_tvepisode = { "title": "Columbo / No Time to Die", "original_title": "Columbo / No Time to Die", "release_year": 1992, "media_type": "TV Episode", "imdb_id": "tt0103987", "imdb_score": 59, "imdb_votes": 2122, "runtime": 98, "genres": {"Crime", "Drama", "Mystery"}, } assert a_tvepisode == movie(a_tvepisode) a_videogame = { "title": "Alan Wake", "original_title": "Alan Wake", "release_year": 2010, "media_type": "Video Game", "imdb_id": "tt0466662", # The data from __NEXT_DATA__ is wrong, the actual values should be: # "imdb_score": 82, # "imdb_votes": 7300, # "genres": {"Action", "Adventure", "Horror"}, "imdb_score": 67, # Wrong value, but correctly parsed from __NEXT_DATA__ "imdb_votes": 11655, # Wrong value, but correctly parsed from __NEXT_DATA__ "genres": {"Comedy", "Crime", "Drama"}, # Wrong value } assert a_videogame == movie(a_videogame) @pytest.mark.asyncio async def test_load_ratings_page_20240720(monkeypatch): with bz2.open(fixturesdir / "ratings-ur655321-20240720.html.bz2", "rb") as f: html = f.read() soup = bs4.BeautifulSoup(html, "html5lib") monkeypatch.setattr(imdb, "asoup_from_url", AsyncMock(return_value=soup)) with bz2.open(fixturesdir / "ratings-ur655321-20240720.gql.json.bz2", "rb") as f: jsonstr = f.read() async with imdb.asession() as s: monkeypatch.setattr(s, "post", AsyncMock(return_value=_mock_response(jsonstr))) page = await imdb._load_ratings_page("fakeurl", "ur655321") assert len(page.ratings) == 100 assert page.imdb_user_id is not None assert page.imdb_user_id == "ur655321" assert page.imdb_user_name == "AlexUltra" assert page.next_page_url is None, "not supported for new ratings page" def movie(item: dict): for rating in page.ratings: assert rating.movie if rating.movie.imdb_id == item["imdb_id"]: rating_dict = {key: getattr(rating.movie, key) for key in item.keys()} return rating_dict raise AssertionError(f"{item['imdb_id']} not found in page.ratings") a_movie = { "title": "Kung Fu Panda 4", "release_year": 2024, "media_type": "Movie", "imdb_id": "tt21692408", "imdb_score": 59, "imdb_votes": 48018, "runtime": 94, } assert a_movie == movie(a_movie) a_running_tvseries = { "title": "Palm Royale", "release_year": 2024, "media_type": "TV Series", "imdb_id": "tt8888540", "imdb_score": 63, "imdb_votes": 9458, } assert a_running_tvseries == movie(a_running_tvseries) a_finished_tvseries = { "title": "Fawlty Towers", "release_year": 1975, "media_type": "TV Series", "imdb_id": "tt0072500", "imdb_score": 87, "imdb_votes": 100860, } assert a_finished_tvseries == movie(a_finished_tvseries)