unwind/tests/test_imdb.py

import bz2
from pathlib import Path
from unittest.mock import AsyncMock

import bs4
import pytest

from unwind import imdb
from unwind.imdb import imdb_rating_from_score, score_from_imdb_rating

testsdir = Path(__file__).parent
fixturesdir = testsdir / "fixtures"


@pytest.mark.parametrize("rating", (x / 10 for x in range(10, 101)))
def test_rating_conversion(rating: float):
    assert rating == imdb_rating_from_score(score_from_imdb_rating(rating))


@pytest.mark.parametrize("score", range(0, 101))
def test_score_conversion(score: int):
    # Because our score covers 101 discrete values and IMDb's rating only 91
    # discrete values, the mapping is non-injective, i.e. 10 values can't be
    # mapped uniquely.
    non_injective = set(range(5, 100, 10))
    if score in non_injective:
        pytest.skip(f"Score cannot be mapped back correctly: {score}")

    assert score == score_from_imdb_rating(imdb_rating_from_score(score))


@pytest.mark.asyncio
async def test_load_most_popular_100(monkeypatch):
    with bz2.open(fixturesdir / "most_popular_100.html.bz2", "rb") as f:
        html = f.read()
    soup = bs4.BeautifulSoup(html, "html5lib")

    monkeypatch.setattr(imdb, "asoup_from_url", AsyncMock(return_value=soup))

    movie_ids = await imdb.load_most_popular_100()
    assert len(movie_ids) == 100
    assert all(id_.startswith("tt") for id_ in movie_ids)


@pytest.mark.asyncio
async def test_load_bottom_100(monkeypatch):
    with bz2.open(fixturesdir / "bottom_100.html.bz2", "rb") as f:
        html = f.read()
    soup = bs4.BeautifulSoup(html, "html5lib")

    monkeypatch.setattr(imdb, "asoup_from_url", AsyncMock(return_value=soup))

    movie_ids = await imdb.load_bottom_100()
    assert len(movie_ids) == 100
    assert all(id_.startswith("tt") for id_ in movie_ids)


@pytest.mark.asyncio
async def test_load_top_250(monkeypatch):
    with bz2.open(fixturesdir / "top250.gql.json.bz2", "rb") as f:
        jsonstr = f.read()

    monkeypatch.setattr(imdb, "adownload", AsyncMock(return_value=jsonstr))

    movie_ids = await imdb.load_top_250()
    assert len(movie_ids) == 250
    assert all(id_.startswith("tt") for id_ in movie_ids)


@pytest.mark.asyncio
async def test_load_ratings_page(monkeypatch):
    with bz2.open(fixturesdir / "ratings-ur655321.html.bz2", "rb") as f:
        html = f.read()
    soup = bs4.BeautifulSoup(html, "html5lib")

    monkeypatch.setattr(imdb, "asoup_from_url", AsyncMock(return_value=soup))

    page = await imdb.load_ratings_page("fakeurl")
    assert len(page.ratings) == 100
    assert page.imdb_user_id is not None
    assert page.imdb_user_id == "ur655321"
    assert page.imdb_user_name == "AlexUltra"
    assert page.next_page_url is not None
    assert page.next_page_url.startswith("/user/ur655321/ratings?")
feat: add functions to retrieve IMDb chart lists These charts are - the top 250 highest rated movies - the top 100 most popular movies - the bottom 100 lowest rated movies 2024-05-10 00:12:25 +02:00			`import bz2`
			`from pathlib import Path`
			`from unittest.mock import AsyncMock`

			`import bs4`
add imdb full import mode 2021-06-21 18:54:03 +02:00			`import pytest`
apply auto-formatting to tests 2023-02-04 18:15:14 +01:00
feat: add functions to retrieve IMDb chart lists These charts are - the top 250 highest rated movies - the top 100 most popular movies - the bottom 100 lowest rated movies 2024-05-10 00:12:25 +02:00			`from unwind import imdb`
add imdb full import mode 2021-06-21 18:54:03 +02:00			`from unwind.imdb import imdb_rating_from_score, score_from_imdb_rating`

feat: add functions to retrieve IMDb chart lists These charts are - the top 250 highest rated movies - the top 100 most popular movies - the bottom 100 lowest rated movies 2024-05-10 00:12:25 +02:00			`testsdir = Path(__file__).parent`
			`fixturesdir = testsdir / "fixtures"`

add imdb full import mode 2021-06-21 18:54:03 +02:00
			`@pytest.mark.parametrize("rating", (x / 10 for x in range(10, 101)))`
fix tests for Pytest-Asyncio running in strict mode 2023-02-04 18:12:50 +01:00			`def test_rating_conversion(rating: float):`
add imdb full import mode 2021-06-21 18:54:03 +02:00			`assert rating == imdb_rating_from_score(score_from_imdb_rating(rating))`


			`@pytest.mark.parametrize("score", range(0, 101))`
fix tests for Pytest-Asyncio running in strict mode 2023-02-04 18:12:50 +01:00			`def test_score_conversion(score: int):`
add imdb full import mode 2021-06-21 18:54:03 +02:00			`# Because our score covers 101 discrete values and IMDb's rating only 91`
			`# discrete values, the mapping is non-injective, i.e. 10 values can't be`
			`# mapped uniquely.`
			`non_injective = set(range(5, 100, 10))`
			`if score in non_injective:`
			`pytest.skip(f"Score cannot be mapped back correctly: {score}")`

			`assert score == score_from_imdb_rating(imdb_rating_from_score(score))`
feat: add functions to retrieve IMDb chart lists These charts are - the top 250 highest rated movies - the top 100 most popular movies - the bottom 100 lowest rated movies 2024-05-10 00:12:25 +02:00

			`@pytest.mark.asyncio`
			`async def test_load_most_popular_100(monkeypatch):`
			`with bz2.open(fixturesdir / "most_popular_100.html.bz2", "rb") as f:`
			`html = f.read()`
			`soup = bs4.BeautifulSoup(html, "html5lib")`

			`monkeypatch.setattr(imdb, "asoup_from_url", AsyncMock(return_value=soup))`

			`movie_ids = await imdb.load_most_popular_100()`
			`assert len(movie_ids) == 100`
			`assert all(id_.startswith("tt") for id_ in movie_ids)`


			`@pytest.mark.asyncio`
			`async def test_load_bottom_100(monkeypatch):`
			`with bz2.open(fixturesdir / "bottom_100.html.bz2", "rb") as f:`
			`html = f.read()`
			`soup = bs4.BeautifulSoup(html, "html5lib")`

			`monkeypatch.setattr(imdb, "asoup_from_url", AsyncMock(return_value=soup))`

			`movie_ids = await imdb.load_bottom_100()`
			`assert len(movie_ids) == 100`
			`assert all(id_.startswith("tt") for id_ in movie_ids)`


			`@pytest.mark.asyncio`
			`async def test_load_top_250(monkeypatch):`
			`with bz2.open(fixturesdir / "top250.gql.json.bz2", "rb") as f:`
			`jsonstr = f.read()`

			`monkeypatch.setattr(imdb, "adownload", AsyncMock(return_value=jsonstr))`

			`movie_ids = await imdb.load_top_250()`
			`assert len(movie_ids) == 250`
			`assert all(id_.startswith("tt") for id_ in movie_ids)`
fix: find next rating page 2024-05-10 00:13:32 +02:00

			`@pytest.mark.asyncio`
			`async def test_load_ratings_page(monkeypatch):`
			`with bz2.open(fixturesdir / "ratings-ur655321.html.bz2", "rb") as f:`
			`html = f.read()`
			`soup = bs4.BeautifulSoup(html, "html5lib")`

			`monkeypatch.setattr(imdb, "asoup_from_url", AsyncMock(return_value=soup))`

			`page = await imdb.load_ratings_page("fakeurl")`
			`assert len(page.ratings) == 100`
			`assert page.imdb_user_id is not None`
			`assert page.imdb_user_id == "ur655321"`
			`assert page.imdb_user_name == "AlexUltra"`
			`assert page.next_page_url is not None`
			`assert page.next_page_url.startswith("/user/ur655321/ratings?")`