From b5cb22822e502d1e0358e09665414d1c1403bff9 Mon Sep 17 00:00:00 2001 From: ducklet Date: Tue, 15 Jun 2021 19:09:21 +0200 Subject: [PATCH] init with some kind of working prototype --- .gitignore | 4 + Dockerfile | 19 +++ README.md | 6 + poetry.lock | 378 +++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 23 +++ pyrightconfig.json | 5 + run | 15 ++ scripts/app | 5 + scripts/build | 7 + scripts/dev | 5 + scripts/lint | 8 + scripts/server | 5 + unwind/__init__.py | 1 + unwind/__main__.py | 36 +++++ unwind/config.py | 14 ++ unwind/db.py | 185 ++++++++++++++++++++++ unwind/imdb.py | 132 ++++++++++++++++ unwind/init.sql | 36 +++++ unwind/models.py | 139 +++++++++++++++++ unwind/request.py | 185 ++++++++++++++++++++++ unwind/types.py | 31 ++++ unwind/web.py | 53 +++++++ 22 files changed, 1292 insertions(+) create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 README.md create mode 100644 poetry.lock create mode 100644 pyproject.toml create mode 100644 pyrightconfig.json create mode 100755 run create mode 100755 scripts/app create mode 100755 scripts/build create mode 100755 scripts/dev create mode 100755 scripts/lint create mode 100755 scripts/server create mode 100644 unwind/__init__.py create mode 100644 unwind/__main__.py create mode 100644 unwind/config.py create mode 100644 unwind/db.py create mode 100644 unwind/imdb.py create mode 100644 unwind/init.sql create mode 100644 unwind/models.py create mode 100644 unwind/request.py create mode 100644 unwind/types.py create mode 100644 unwind/web.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..30a091a --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.pyc +/.cache +/data/* +/requirements.txt diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..4d2a169 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +FROM docker.io/library/python:3.9-alpine + +RUN apk update --no-cache \ + && apk upgrade --no-cache \ + && pip install --no-cache-dir --upgrade pip + +RUN addgroup -g 10001 py \ + && adduser -D -u 10000 -G py py + +WORKDIR /var/app + +COPY requirements.txt ./ + +RUN pip install --no-cache-dir --upgrade --requirement requirements.txt + +USER 10000:10001 + +ENTRYPOINT ["/var/app/run"] +CMD ["server"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..c1ba3cf --- /dev/null +++ b/README.md @@ -0,0 +1,6 @@ +# Unwind + +A cache & aggregator for user ratings from iMDB and other sources. + +Users are spread across many platforms and most platforms allow very limited access to their data. +This service aims to offer one interface for all that data and open up access. diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..31651e7 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,378 @@ +[[package]] +name = "aiosqlite" +version = "0.17.0" +description = "asyncio bridge to the standard sqlite3 module" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +typing_extensions = ">=3.7.2" + +[[package]] +name = "asgiref" +version = "3.3.4" +description = "ASGI specs, helper code, and adapters" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.extras] +tests = ["pytest", "pytest-asyncio", "mypy (>=0.800)"] + +[[package]] +name = "beautifulsoup4" +version = "4.9.3" +description = "Screen-scraping library" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +soupsieve = {version = ">1.2", markers = "python_version >= \"3.0\""} + +[package.extras] +html5lib = ["html5lib"] +lxml = ["lxml"] + +[[package]] +name = "certifi" +version = "2020.12.5" +description = "Python package for providing Mozilla's CA Bundle." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "chardet" +version = "4.0.0" +description = "Universal encoding detector for Python 2 and 3" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "click" +version = "8.0.1" +description = "Composable command line interface toolkit" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "colorama" +version = "0.4.4" +description = "Cross-platform colored terminal text." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "databases" +version = "0.4.3" +description = "Async database support for Python." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +aiosqlite = {version = "*", optional = true, markers = "extra == \"sqlite\""} +sqlalchemy = "<1.4" + +[package.extras] +mysql = ["aiomysql"] +postgresql = ["asyncpg"] +postgresql_aiopg = ["aiopg"] +sqlite = ["aiosqlite"] + +[[package]] +name = "h11" +version = "0.12.0" +description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "html5lib" +version = "1.1" +description = "HTML parser based on the WHATWG HTML specification" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[package.dependencies] +six = ">=1.9" +webencodings = "*" + +[package.extras] +all = ["genshi", "chardet (>=2.2)", "lxml"] +chardet = ["chardet (>=2.2)"] +genshi = ["genshi"] +lxml = ["lxml"] + +[[package]] +name = "idna" +version = "2.10" +description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "requests" +version = "2.25.1" +description = "Python HTTP for Humans." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[package.dependencies] +certifi = ">=2017.4.17" +chardet = ">=3.0.2,<5" +idna = ">=2.5,<3" +urllib3 = ">=1.21.1,<1.27" + +[package.extras] +security = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)"] +socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "soupsieve" +version = "2.2.1" +description = "A modern CSS selector implementation for Beautiful Soup." +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "sqlalchemy" +version = "1.3.24" +description = "Database Abstraction Library" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[package.extras] +mssql = ["pyodbc"] +mssql_pymssql = ["pymssql"] +mssql_pyodbc = ["pyodbc"] +mysql = ["mysqlclient"] +oracle = ["cx-oracle"] +postgresql = ["psycopg2"] +postgresql_pg8000 = ["pg8000 (<1.16.6)"] +postgresql_psycopg2binary = ["psycopg2-binary"] +postgresql_psycopg2cffi = ["psycopg2cffi"] +pymysql = ["pymysql (<1)", "pymysql"] + +[[package]] +name = "starlette" +version = "0.14.2" +description = "The little ASGI library that shines." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.extras] +full = ["aiofiles", "graphene", "itsdangerous", "jinja2", "python-multipart", "pyyaml", "requests"] + +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +category = "main" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "typing-extensions" +version = "3.10.0.0" +description = "Backported and Experimental Type Hints for Python 3.5+" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "ulid-py" +version = "1.1.0" +description = "Universally Unique Lexicographically Sortable Identifier" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "urllib3" +version = "1.26.4" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" + +[package.extras] +secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] +brotli = ["brotlipy (>=0.6.0)"] + +[[package]] +name = "uvicorn" +version = "0.14.0" +description = "The lightning-fast ASGI server." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +asgiref = ">=3.3.4" +click = ">=7" +h11 = ">=0.8" + +[package.extras] +standard = ["websockets (>=9.1)", "httptools (>=0.2.0,<0.3.0)", "watchgod (>=0.6)", "python-dotenv (>=0.13)", "PyYAML (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "colorama (>=0.4)"] + +[[package]] +name = "webencodings" +version = "0.5.1" +description = "Character encoding aliases for legacy web content" +category = "main" +optional = false +python-versions = "*" + +[metadata] +lock-version = "1.1" +python-versions = "^3.9" +content-hash = "28c14ec611e61db259fa6aa160df99308f7452874f69377a634d07cd379603c8" + +[metadata.files] +aiosqlite = [ + {file = "aiosqlite-0.17.0-py3-none-any.whl", hash = "sha256:6c49dc6d3405929b1d08eeccc72306d3677503cc5e5e43771efc1e00232e8231"}, + {file = "aiosqlite-0.17.0.tar.gz", hash = "sha256:f0e6acc24bc4864149267ac82fb46dfb3be4455f99fe21df82609cc6e6baee51"}, +] +asgiref = [ + {file = "asgiref-3.3.4-py3-none-any.whl", hash = "sha256:92906c611ce6c967347bbfea733f13d6313901d54dcca88195eaeb52b2a8e8ee"}, + {file = "asgiref-3.3.4.tar.gz", hash = "sha256:d1216dfbdfb63826470995d31caed36225dcaf34f182e0fa257a4dd9e86f1b78"}, +] +beautifulsoup4 = [ + {file = "beautifulsoup4-4.9.3-py2-none-any.whl", hash = "sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35"}, + {file = "beautifulsoup4-4.9.3-py3-none-any.whl", hash = "sha256:fff47e031e34ec82bf17e00da8f592fe7de69aeea38be00523c04623c04fb666"}, + {file = "beautifulsoup4-4.9.3.tar.gz", hash = "sha256:84729e322ad1d5b4d25f805bfa05b902dd96450f43842c4e99067d5e1369eb25"}, +] +certifi = [ + {file = "certifi-2020.12.5-py2.py3-none-any.whl", hash = "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830"}, + {file = "certifi-2020.12.5.tar.gz", hash = "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c"}, +] +chardet = [ + {file = "chardet-4.0.0-py2.py3-none-any.whl", hash = "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"}, + {file = "chardet-4.0.0.tar.gz", hash = "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa"}, +] +click = [ + {file = "click-8.0.1-py3-none-any.whl", hash = "sha256:fba402a4a47334742d782209a7c79bc448911afe1149d07bdabdf480b3e2f4b6"}, + {file = "click-8.0.1.tar.gz", hash = "sha256:8c04c11192119b1ef78ea049e0a6f0463e4c48ef00a30160c704337586f3ad7a"}, +] +colorama = [ + {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, + {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, +] +databases = [ + {file = "databases-0.4.3-py3-none-any.whl", hash = "sha256:f82b02c28fdddf7ffe7ee1945f5abef44d687ba97b9a1c81492c7f035d4c90e6"}, + {file = "databases-0.4.3.tar.gz", hash = "sha256:1521db7f6d3c581ff81b3552e130b27a13aefea2a57295e65738081831137afc"}, +] +h11 = [ + {file = "h11-0.12.0-py3-none-any.whl", hash = "sha256:36a3cb8c0a032f56e2da7084577878a035d3b61d104230d4bd49c0c6b555a9c6"}, + {file = "h11-0.12.0.tar.gz", hash = "sha256:47222cb6067e4a307d535814917cd98fd0a57b6788ce715755fa2b6c28b56042"}, +] +html5lib = [ + {file = "html5lib-1.1-py2.py3-none-any.whl", hash = "sha256:0d78f8fde1c230e99fe37986a60526d7049ed4bf8a9fadbad5f00e22e58e041d"}, + {file = "html5lib-1.1.tar.gz", hash = "sha256:b2e5b40261e20f354d198eae92afc10d750afb487ed5e50f9c4eaf07c184146f"}, +] +idna = [ + {file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"}, + {file = "idna-2.10.tar.gz", hash = "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6"}, +] +requests = [ + {file = "requests-2.25.1-py2.py3-none-any.whl", hash = "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"}, + {file = "requests-2.25.1.tar.gz", hash = "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804"}, +] +six = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] +soupsieve = [ + {file = "soupsieve-2.2.1-py3-none-any.whl", hash = "sha256:c2c1c2d44f158cdbddab7824a9af8c4f83c76b1e23e049479aa432feb6c4c23b"}, + {file = "soupsieve-2.2.1.tar.gz", hash = "sha256:052774848f448cf19c7e959adf5566904d525f33a3f8b6ba6f6f8f26ec7de0cc"}, +] +sqlalchemy = [ + {file = "SQLAlchemy-1.3.24-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:87a2725ad7d41cd7376373c15fd8bf674e9c33ca56d0b8036add2d634dba372e"}, + {file = "SQLAlchemy-1.3.24-cp27-cp27m-win32.whl", hash = "sha256:f597a243b8550a3a0b15122b14e49d8a7e622ba1c9d29776af741f1845478d79"}, + {file = "SQLAlchemy-1.3.24-cp27-cp27m-win_amd64.whl", hash = "sha256:fc4cddb0b474b12ed7bdce6be1b9edc65352e8ce66bc10ff8cbbfb3d4047dbf4"}, + {file = "SQLAlchemy-1.3.24-cp35-cp35m-macosx_10_14_x86_64.whl", hash = "sha256:f1149d6e5c49d069163e58a3196865e4321bad1803d7886e07d8710de392c548"}, + {file = "SQLAlchemy-1.3.24-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:14f0eb5db872c231b20c18b1e5806352723a3a89fb4254af3b3e14f22eaaec75"}, + {file = "SQLAlchemy-1.3.24-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:e98d09f487267f1e8d1179bf3b9d7709b30a916491997137dd24d6ae44d18d79"}, + {file = "SQLAlchemy-1.3.24-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:fc1f2a5a5963e2e73bac4926bdaf7790c4d7d77e8fc0590817880e22dd9d0b8b"}, + {file = "SQLAlchemy-1.3.24-cp35-cp35m-win32.whl", hash = "sha256:f3c5c52f7cb8b84bfaaf22d82cb9e6e9a8297f7c2ed14d806a0f5e4d22e83fb7"}, + {file = "SQLAlchemy-1.3.24-cp35-cp35m-win_amd64.whl", hash = "sha256:0352db1befcbed2f9282e72843f1963860bf0e0472a4fa5cf8ee084318e0e6ab"}, + {file = "SQLAlchemy-1.3.24-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:2ed6343b625b16bcb63c5b10523fd15ed8934e1ed0f772c534985e9f5e73d894"}, + {file = "SQLAlchemy-1.3.24-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:34fcec18f6e4b24b4a5f6185205a04f1eab1e56f8f1d028a2a03694ebcc2ddd4"}, + {file = "SQLAlchemy-1.3.24-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:e47e257ba5934550d7235665eee6c911dc7178419b614ba9e1fbb1ce6325b14f"}, + {file = "SQLAlchemy-1.3.24-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:816de75418ea0953b5eb7b8a74933ee5a46719491cd2b16f718afc4b291a9658"}, + {file = "SQLAlchemy-1.3.24-cp36-cp36m-win32.whl", hash = "sha256:26155ea7a243cbf23287f390dba13d7927ffa1586d3208e0e8d615d0c506f996"}, + {file = "SQLAlchemy-1.3.24-cp36-cp36m-win_amd64.whl", hash = "sha256:f03bd97650d2e42710fbe4cf8a59fae657f191df851fc9fc683ecef10746a375"}, + {file = "SQLAlchemy-1.3.24-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:a006d05d9aa052657ee3e4dc92544faae5fcbaafc6128217310945610d862d39"}, + {file = "SQLAlchemy-1.3.24-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:1e2f89d2e5e3c7a88e25a3b0e43626dba8db2aa700253023b82e630d12b37109"}, + {file = "SQLAlchemy-1.3.24-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:0d5d862b1cfbec5028ce1ecac06a3b42bc7703eb80e4b53fceb2738724311443"}, + {file = "SQLAlchemy-1.3.24-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:0172423a27fbcae3751ef016663b72e1a516777de324a76e30efa170dbd3dd2d"}, + {file = "SQLAlchemy-1.3.24-cp37-cp37m-win32.whl", hash = "sha256:d37843fb8df90376e9e91336724d78a32b988d3d20ab6656da4eb8ee3a45b63c"}, + {file = "SQLAlchemy-1.3.24-cp37-cp37m-win_amd64.whl", hash = "sha256:c10ff6112d119f82b1618b6dc28126798481b9355d8748b64b9b55051eb4f01b"}, + {file = "SQLAlchemy-1.3.24-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:861e459b0e97673af6cc5e7f597035c2e3acdfb2608132665406cded25ba64c7"}, + {file = "SQLAlchemy-1.3.24-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:5de2464c254380d8a6c20a2746614d5a436260be1507491442cf1088e59430d2"}, + {file = "SQLAlchemy-1.3.24-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:d375d8ccd3cebae8d90270f7aa8532fe05908f79e78ae489068f3b4eee5994e8"}, + {file = "SQLAlchemy-1.3.24-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:014ea143572fee1c18322b7908140ad23b3994036ef4c0d630110faf942652f8"}, + {file = "SQLAlchemy-1.3.24-cp38-cp38-win32.whl", hash = "sha256:6607ae6cd3a07f8a4c3198ffbf256c261661965742e2b5265a77cd5c679c9bba"}, + {file = "SQLAlchemy-1.3.24-cp38-cp38-win_amd64.whl", hash = "sha256:fcb251305fa24a490b6a9ee2180e5f8252915fb778d3dafc70f9cc3f863827b9"}, + {file = "SQLAlchemy-1.3.24-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:01aa5f803db724447c1d423ed583e42bf5264c597fd55e4add4301f163b0be48"}, + {file = "SQLAlchemy-1.3.24-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:4d0e3515ef98aa4f0dc289ff2eebb0ece6260bbf37c2ea2022aad63797eacf60"}, + {file = "SQLAlchemy-1.3.24-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:bce28277f308db43a6b4965734366f533b3ff009571ec7ffa583cb77539b84d6"}, + {file = "SQLAlchemy-1.3.24-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:8110e6c414d3efc574543109ee618fe2c1f96fa31833a1ff36cc34e968c4f233"}, + {file = "SQLAlchemy-1.3.24-cp39-cp39-win32.whl", hash = "sha256:ee5f5188edb20a29c1cc4a039b074fdc5575337c9a68f3063449ab47757bb064"}, + {file = "SQLAlchemy-1.3.24-cp39-cp39-win_amd64.whl", hash = "sha256:09083c2487ca3c0865dc588e07aeaa25416da3d95f7482c07e92f47e080aa17b"}, + {file = "SQLAlchemy-1.3.24.tar.gz", hash = "sha256:ebbb777cbf9312359b897bf81ba00dae0f5cb69fba2a18265dcc18a6f5ef7519"}, +] +starlette = [ + {file = "starlette-0.14.2-py3-none-any.whl", hash = "sha256:3c8e48e52736b3161e34c9f0e8153b4f32ec5d8995a3ee1d59410d92f75162ed"}, + {file = "starlette-0.14.2.tar.gz", hash = "sha256:7d49f4a27f8742262ef1470608c59ddbc66baf37c148e938c7038e6bc7a998aa"}, +] +toml = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] +typing-extensions = [ + {file = "typing_extensions-3.10.0.0-py2-none-any.whl", hash = "sha256:0ac0f89795dd19de6b97debb0c6af1c70987fd80a2d62d1958f7e56fcc31b497"}, + {file = "typing_extensions-3.10.0.0-py3-none-any.whl", hash = "sha256:779383f6086d90c99ae41cf0ff39aac8a7937a9283ce0a414e5dd782f4c94a84"}, + {file = "typing_extensions-3.10.0.0.tar.gz", hash = "sha256:50b6f157849174217d0656f99dc82fe932884fb250826c18350e159ec6cdf342"}, +] +ulid-py = [ + {file = "ulid-py-1.1.0.tar.gz", hash = "sha256:dc6884be91558df077c3011b9fb0c87d1097cb8fc6534b11f310161afd5738f0"}, + {file = "ulid_py-1.1.0-py2.py3-none-any.whl", hash = "sha256:b56a0f809ef90d6020b21b89a87a48edc7c03aea80e5ed5174172e82d76e3987"}, +] +urllib3 = [ + {file = "urllib3-1.26.4-py2.py3-none-any.whl", hash = "sha256:2f4da4594db7e1e110a944bb1b551fdf4e6c136ad42e4234131391e21eb5b0df"}, + {file = "urllib3-1.26.4.tar.gz", hash = "sha256:e7b021f7241115872f92f43c6508082facffbd1c048e3c6e2bb9c2a157e28937"}, +] +uvicorn = [ + {file = "uvicorn-0.14.0-py3-none-any.whl", hash = "sha256:2a76bb359171a504b3d1c853409af3adbfa5cef374a4a59e5881945a97a93eae"}, + {file = "uvicorn-0.14.0.tar.gz", hash = "sha256:45ad7dfaaa7d55cab4cd1e85e03f27e9d60bc067ddc59db52a2b0aeca8870292"}, +] +webencodings = [ + {file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"}, + {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"}, +] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..6067f29 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,23 @@ +[tool.poetry] +name = "unwind" +version = "0.1.0" +description = "" +authors = ["ducklet "] +license = "LOL" + +[tool.poetry.dependencies] +python = "^3.9" +requests = "^2.25.1" +beautifulsoup4 = "^4.9.3" +html5lib = "^1.1" +starlette = "^0.14.2" +ulid-py = "^1.1.0" +databases = {extras = ["sqlite"], version = "^0.4.3"} +toml = "^0.10.2" +uvicorn = "^0.14.0" + +[tool.poetry.dev-dependencies] + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/pyrightconfig.json b/pyrightconfig.json new file mode 100644 index 0000000..7b7ed71 --- /dev/null +++ b/pyrightconfig.json @@ -0,0 +1,5 @@ +{ + "stubPath": "./stubs", + "venvPath": ".", + "venv": ".venv" +} diff --git a/run b/run new file mode 100755 index 0000000..69c6c59 --- /dev/null +++ b/run @@ -0,0 +1,15 @@ +#!/bin/sh -eu + +RUN_BIN=$(realpath "$0") +RUN_DIR=$(dirname "$RUN_BIN") + +export RUN_BIN +export RUN_DIR + +task="$1" +shift + +# export DEBUG=1 +# export UNWIND_LOGLEVEL=DEBUG + +exec scripts/"$task" "$@" diff --git a/scripts/app b/scripts/app new file mode 100755 index 0000000..2677f0f --- /dev/null +++ b/scripts/app @@ -0,0 +1,5 @@ +#!/bin/sh -eu + +[ -z "${DEBUG:-}" ] || set -x + +exec python -m unwind "$@" diff --git a/scripts/build b/scripts/build new file mode 100755 index 0000000..3be3536 --- /dev/null +++ b/scripts/build @@ -0,0 +1,7 @@ +#!/bin/sh -eu + +cd "$RUN_DIR" + +[ -z "${DEBUG:-}" ] || set -x + +exec poetry export -o requirements.txt diff --git a/scripts/dev b/scripts/dev new file mode 100755 index 0000000..5102be9 --- /dev/null +++ b/scripts/dev @@ -0,0 +1,5 @@ +#!/bin/sh -eu + +[ -z "${DEBUG:-}" ] || set -x + +exec uvicorn unwind:web_app --reload diff --git a/scripts/lint b/scripts/lint new file mode 100755 index 0000000..09b4184 --- /dev/null +++ b/scripts/lint @@ -0,0 +1,8 @@ +#!/bin/sh -eu + +cd "$RUN_DIR" + +[ -z "${DEBUG:-}" ] || set -x + +isort --profile black unwind +black unwind diff --git a/scripts/server b/scripts/server new file mode 100755 index 0000000..74580e8 --- /dev/null +++ b/scripts/server @@ -0,0 +1,5 @@ +#!/bin/sh -eu + +[ -z "${DEBUG:-}" ] || set -x + +exec uvicorn --host 0.0.0.0 unwind:web_app diff --git a/unwind/__init__.py b/unwind/__init__.py new file mode 100644 index 0000000..400fbe3 --- /dev/null +++ b/unwind/__init__.py @@ -0,0 +1 @@ +from .web import app as web_app diff --git a/unwind/__main__.py b/unwind/__main__.py new file mode 100644 index 0000000..0541b4a --- /dev/null +++ b/unwind/__main__.py @@ -0,0 +1,36 @@ +import asyncio +import logging + +from . import config +from .db import close_connection_pool, open_connection_pool +from .imdb import load_imdb +from .request import session + +log = logging.getLogger(__name__) + + +async def run_import(): + await open_connection_pool() + + with session() as s: + s.headers["Accept-Language"] = "en-GB, en;q=0.5" + + for name, imdb_id in config.imdb.items(): + log.info("Loading data for %s ... ⚡️", name) + await load_imdb(imdb_id) + + await close_connection_pool() + + +def main(): + logging.basicConfig( + format="%(asctime)s.%(msecs)03d [%(name)s:%(process)d] %(levelname)s: %(message)s", + datefmt="%H:%M:%S", + level=config.loglevel, + ) + log.debug(f"Log level: {config.loglevel}") + + asyncio.run(run_import()) + + +main() diff --git a/unwind/config.py b/unwind/config.py new file mode 100644 index 0000000..d13f59b --- /dev/null +++ b/unwind/config.py @@ -0,0 +1,14 @@ +import os +from pathlib import Path + +import toml + +cachedir = ( + Path(cachedir) if (cachedir := os.getenv("UNWIND_CACHEDIR", ".cache")) else None +) +debug = os.getenv("DEBUG") == "1" +loglevel = os.getenv("UNWIND_LOGLEVEL") or ("DEBUG" if debug else "INFO") +storage_path = os.getenv("UNWIND_STORAGE", "./data/db.sqlite") +config_path = os.getenv("UNWIND_CONFIG", "./data/config.toml") + +imdb = toml.load(config_path)["imdb"] diff --git a/unwind/db.py b/unwind/db.py new file mode 100644 index 0000000..03145ca --- /dev/null +++ b/unwind/db.py @@ -0,0 +1,185 @@ +import logging +from dataclasses import fields +from pathlib import Path +from typing import Optional, Type, TypeVar + +from databases import Database + +from . import config +from .models import Movie, Rating, User, asplain, fromplain, utcnow + +log = logging.getLogger(__name__) + +_shared_connection: Optional[Database] = None + + +async def open_connection_pool() -> None: + """Open the DB connection pool. + + This function needs to be called before any access to the database can happen. + """ + db = shared_connection() + await db.connect() + + await init_db(db) + + +async def close_connection_pool() -> None: + """Close the DB connection pool. + + This function should be called before the app shuts down to ensure all data + has been flushed to the database. + """ + db = shared_connection() + + # Run automatic ANALYZE prior to closing the db, + # see https://sqlite.com/lang_analyze.html. + await db.execute("PRAGMA analysis_limit=400") + await db.execute("PRAGMA optimize") + + await db.disconnect() + + +def shared_connection() -> Database: + global _shared_connection + + if _shared_connection is None: + uri = f"sqlite:///{config.storage_path}" + _shared_connection = Database(uri) + + return _shared_connection + + +async def init_db(db): + sql = Path(__file__).with_name("init.sql").read_text() + async with db.transaction(): + for stmt in sql.split(";;"): + await db.execute(query=stmt) + + +async def add(item): + values = asplain(item) + keys = ", ".join(f"{k}" for k in values) + placeholders = ", ".join(f":{k}" for k in values) + query = f"INSERT INTO {item._table} ({keys}) VALUES ({placeholders})" + await shared_connection().execute(query=query, values=values) + + +ModelType = TypeVar("ModelType") + + +async def get(model: Type[ModelType], **kwds) -> Optional[ModelType]: + fields_ = ", ".join(f.name for f in fields(model)) + cond = " AND ".join(f"{k}=:{k}" for k in kwds) + query = f"SELECT {fields_} FROM {model._table} WHERE {cond}" + row = await shared_connection().fetch_one(query=query, values=kwds) + return fromplain(model, row) if row else None + + +async def update(item): + values = asplain(item) + keys = ", ".join(f"{k}=:{k}" for k in values if k != "id") + query = f"UPDATE {item._table} SET {keys} WHERE id=:id" + await shared_connection().execute(query=query, values=values) + + +async def add_or_update_user(user: User): + db_user = await get(User, imdb_id=user.imdb_id) + if not db_user: + await add(user) + else: + user.id = db_user.id + + if user != db_user: + await update(user) + + +async def add_or_update_movie(movie: Movie): + db_movie = await get(Movie, imdb_id=movie.imdb_id) + if not db_movie: + await add(movie) + else: + movie.id = db_movie.id + movie.updated = db_movie.updated + + if movie != db_movie: + movie.updated = utcnow() + await update(movie) + + +async def add_or_update_rating(rating: Rating) -> bool: + db_rating = await get( + Rating, movie_id=str(rating.movie_id), user_id=str(rating.user_id) + ) + + if not db_rating: + await add(rating) + return True + + else: + rating.id = db_rating.id + + if rating != db_rating: + await update(rating) + return True + + return False + + +def sql_escape(s: str, char="#"): + return s.replace(char, 2 * char).replace("%", f"{char}%").replace("_", f"{char}_") + + +async def find_ratings( + *, + imdb_movie_id: str = None, + title: str = None, + media_type: str = None, + ignore_tv_episodes: bool = False, + limit_rows=10, +): + values = { + "limit_rows": limit_rows, + } + + conditions = [] + if title: + values["escape"] = "#" + escaped_title = sql_escape(title, char=values["escape"]) + values["pattern"] = "%" + "%".join(escaped_title.split()) + "%" + conditions.append("movies.title LIKE :pattern ESCAPE :escape") + + if media_type: + values["media_type"] = media_type + conditions.append("movies.media_type=:media_type") + + if ignore_tv_episodes: + conditions.append("movies.media_type!='TV Episode'") + + query = f""" + WITH newest_movies + AS ( + SELECT DISTINCT ratings.movie_id + FROM ratings + LEFT JOIN movies ON movies.id=ratings.movie_id + {('WHERE ' + ' AND '.join(conditions)) if conditions else ''} + ORDER BY length(movies.title) ASC, ratings.rating_date DESC + LIMIT :limit_rows + ) + + SELECT + users.name AS user_name, + ratings.score AS user_score, + movies.score AS imdb_score, + movies.imdb_id AS movie_imdb_id, + movies.media_type AS media_type, + movies.title AS movie_title, + movies.release_year AS release_year + FROM newest_movies + LEFT JOIN ratings ON ratings.movie_id=newest_movies.movie_id + LEFT JOIN users ON users.id=ratings.user_id + LEFT JOIN movies ON movies.id=ratings.movie_id + """ + + rows = await shared_connection().fetch_all(query=query, values=values) + return tuple(dict(r) for r in rows) diff --git a/unwind/imdb.py b/unwind/imdb.py new file mode 100644 index 0000000..17e792d --- /dev/null +++ b/unwind/imdb.py @@ -0,0 +1,132 @@ +import logging +import re +from collections import namedtuple +from datetime import datetime +from typing import Optional +from urllib.parse import urljoin + +from .db import add_or_update_movie, add_or_update_rating, add_or_update_user +from .models import Movie, Rating, User, asplain, fromplain +from .request import soup_from_url + +log = logging.getLogger(__name__) + +# div#ratings-container +# div.lister-item.mode-detail +# div.lister-item-content +# h3.lister-item-header +# a +# [href] +# .text +# span.lister-item-year.text +# br +# a +# [href] +# .text +# span.lister-item-year.text +# span.runtime.text +# span.genre.text +# div.ipl-rating-widget +# div.ipl-rating-star.small +# span.ipl-rating-star__rating.text +# div.ipl-rating-star.ipl-rating-star--other-user.small +# span.ipl-rating-star__rating.text +# p.text-muted.text ("Rated on 06 May 2021") + + +def imdb_url(user_id): + return f"https://www.imdb.com/user/{user_id}/ratings" + + +find_name = re.compile(r"(?P.*)'s Ratings").fullmatch +find_rating_date = re.compile(r"Rated on (?P\d{2} \w+ \d{4})").fullmatch +find_runtime = re.compile(r"((?P\d+) hr)? ?((?P\d+) min)?").fullmatch +# find_year = re.compile( +# r"(\([IVX]+\) )?\((?P\d{4})(–( |\d{4})| TV (Special|Movie)| Video)?\)" +# ).fullmatch +find_year = re.compile( + r"(\([IVX]+\) )?\((?P\d{4})(–( |\d{4})| (?P[^)]+))?\)" +).fullmatch +find_movie_id = re.compile(r"/title/(?Ptt\d+)/").search + + +async def parse_page(url, stop_on_dupe=True) -> Optional[str]: + soup = soup_from_url(url) + + user = User(imdb_id=soup.find("meta", property="pageId")["content"], name="") + if match := find_name(soup.h1.string): + user.name = match["name"] + await add_or_update_user(user) + + items = soup.find_all("div", "lister-item-content") + for i, item in enumerate(items): + + movie = Movie( + title=item.h3.a.string.strip(), + genres=set(s.strip() for s in item.find("span", "genre").string.split(",")), + ) + + episode_br = item.h3.br + if episode_br: + episode_a = episode_br.find_next("a") + if not episode_a: + log.error("Unknown document structure.") + continue + + movie.media_type = "TV Episode" + movie.title += " / " + episode_a.string.strip() + if match := find_year( + episode_br.find_next("span", "lister-item-year").string + ): + movie.release_year = int(match["year"]) + if match := find_movie_id(episode_a["href"]): + movie.imdb_id = match["id"] + + rating = Rating(user_id=user.id) + + if (tag := item.find("span", "runtime")) and ( + match := find_runtime(tag.string) + ): + movie.runtime = int(match["h"] or 0) * 60 + int(match["m"] or 0) + + if not episode_br: + if match := find_year(item.h3.find("span", "lister-item-year").string): + if media_type := match["type"]: + movie.media_type = media_type.strip() + movie.release_year = int(match["year"]) + if match := find_movie_id(item.h3.a["href"]): + movie.imdb_id = match["id"] + + ratings_item = item.find("div", "ipl-rating-widget") + if match := find_rating_date(ratings_item.find_next("p", "text-muted").string): + rating.rating_date = datetime.strptime(match["date"], "%d %b %Y") + for rating_item in ratings_item.find_all("span", "ipl-rating-star__rating")[:2]: + if "ipl-rating-star--other-user" in rating_item.parent["class"]: + rating.score = int(float(rating_item.string) * 10) + else: + movie.score = int(float(rating_item.string) * 10) + + if not movie.media_type: + movie.media_type = "Movie" + + await add_or_update_movie(movie) + + rating.movie_id = movie.id # needs to be set _after_ movie has been updated + is_updated = await add_or_update_rating(rating) + + if stop_on_dupe and not is_updated: + log.info("Import stopped after %s items. Caught up to known state. ✋", i) + return None + + next_url = urljoin( + url, soup.find("div", "footer").find(string=re.compile(r"Next")).parent["href"] + ) + + return next_url if url != next_url else None + + +async def load_imdb(user_id): + next_url = imdb_url(user_id) + + while next_url := await parse_page(next_url): + pass diff --git a/unwind/init.sql b/unwind/init.sql new file mode 100644 index 0000000..d0bd446 --- /dev/null +++ b/unwind/init.sql @@ -0,0 +1,36 @@ +PRAGMA foreign_keys = ON;; + +CREATE TABLE IF NOT EXISTS users ( + id TEXT NOT NULL PRIMARY KEY, + imdb_id TEXT NOT NULL UNIQUE, + name TEXT NOT NULL +);; + +CREATE TABLE IF NOT EXISTS movies ( + id TEXT NOT NULL PRIMARY KEY, + title TEXT NOT NULL, + release_year NUMBER NOT NULL, + media_type TEXT NOT NULL, + imdb_id TEXT NOT NULL UNIQUE, + score NUMBER NOT NULL, + runtime NUMBER, + genres TEXT NOT NULL, + updated TEXT NOT NULL +);; + +CREATE TABLE IF NOT EXISTS ratings ( + id TEXT NOT NULL PRIMARY KEY, + movie_id TEXT NOT NULL, + user_id TEXT NOT NULL, + score NUMBER NOT NULL, + rating_date TEXT NOT NULL, + favorite NUMBER, + finished NUMBER, + FOREIGN KEY(movie_id) REFERENCES movies(id), + FOREIGN KEY(user_id) REFERENCES users(id) +);; + +CREATE UNIQUE INDEX IF NOT EXISTS ratings_index ON ratings ( + movie_id, + user_id +);; diff --git a/unwind/models.py b/unwind/models.py new file mode 100644 index 0000000..f44cfe4 --- /dev/null +++ b/unwind/models.py @@ -0,0 +1,139 @@ +import json +from dataclasses import asdict, dataclass, field, fields, is_dataclass +from datetime import datetime, timezone +from typing import Any, ClassVar, Optional, Type, Union, get_args, get_origin + +from .types import ULID + + +def is_optional(tp: Type): + if get_origin(tp) is not Union: + return False + + args = get_args(tp) + return len(args) == 2 and type(None) in args + + +def optional_type(tp: Type): + if get_origin(tp) is not Union: + return None + + args = get_args(tp) + if len(args) != 2 or args[1] is not type(None): + return None + + return args[0] + + +def asplain(o) -> dict[str, Any]: + validate(o) + + d = asdict(o) + for f in fields(o): + + target = f.type + # XXX this doesn't properly support any kind of nested types + if (otype := optional_type(f.type)) is not None: + target = otype + if (otype := get_origin(target)) is not None: + target = otype + + v = d[f.name] + if target is ULID: + d[f.name] = str(v) + elif target in {datetime}: + d[f.name] = v.isoformat() + elif target in {set}: + d[f.name] = json.dumps(list(sorted(v))) + elif target in {list}: + d[f.name] = json.dumps(list(v)) + elif target in {bool, str, int, float, None}: + pass + else: + raise ValueError(f"Unsupported value type: {f.name}: {type(v)}") + + return d + + +def fromplain(cls, d: dict[str, Any]): + # if not is_dataclass(cls): + # raise TypeError(f'Not a dataclass: {type(cls)}') + + dd = {} + for f in fields(cls): + + target = f.type + otype = optional_type(f.type) + is_opt = otype is not None + if is_opt: + target = otype + if (xtype := get_origin(target)) is not None: + target = xtype + + v = d[f.name] + if is_opt and v is None: + dd[f.name] = v + elif isinstance(v, target): + dd[f.name] = v + elif target in {set, list}: + dd[f.name] = target(json.loads(v)) + elif target in {datetime}: + dd[f.name] = target.fromisoformat(v) + else: + dd[f.name] = target(v) + + o = cls(**dd) + validate(o) + return o + + +def validate(o): + for f in fields(o): + vtype = type(getattr(o, f.name)) + if vtype is not f.type: + if get_origin(f.type) is vtype or ( + get_origin(f.type) is Union and vtype in get_args(f.type) + ): + continue + raise ValueError(f"Invalid value type: {f.name}: {vtype}") + + +def utcnow(): + return datetime.now().replace(tzinfo=timezone.utc) + + +@dataclass +class Movie: + _table: ClassVar[str] = "movies" + + id: ULID = field(default_factory=ULID) + title: str = None # canonical title + release_year: int = None # canonical release date + media_type: Optional[str] = None + imdb_id: str = None + score: int = None # range: [0,100] + runtime: Optional[int] = None # minutes + genres: set[str] = None + updated: datetime = field(default_factory=utcnow) + + +@dataclass +class Rating: + _table: ClassVar[str] = "ratings" + + id: ULID = field(default_factory=ULID) + movie_id: ULID = None + user_id: ULID = None + score: int = None # range: [0,100] + rating_date: datetime = None + favorite: Optional[bool] = None + finished: Optional[bool] = None + + +@dataclass +class User: + _table: ClassVar[str] = "users" + + id: ULID = field(default_factory=ULID) + imdb_id: str = None + name: str = None # canonical user name diff --git a/unwind/request.py b/unwind/request.py new file mode 100644 index 0000000..b517f58 --- /dev/null +++ b/unwind/request.py @@ -0,0 +1,185 @@ +import json +import logging +from collections import deque +from contextlib import contextmanager +from dataclasses import dataclass +from functools import wraps +from hashlib import md5 +from random import random +from time import sleep, time +from typing import Callable + +import bs4 +import requests +from urllib3.util.retry import Retry + +from . import config + +log = logging.getLogger(__name__) + +if config.debug and config.cachedir: + config.cachedir.mkdir(exist_ok=True) + + +def set_retries(s: requests.Session, n: int, backoff_factor: float = 0.2): + retry = ( + Retry( + total=n, + connect=n, + read=n, + status=n, + status_forcelist=Retry.RETRY_AFTER_STATUS_CODES, + backoff_factor=backoff_factor, + ) + if n + else Retry(0, read=False) + ) + for a in s.adapters.values(): + a.max_retries = retry + + +_shared_session = None + + +@contextmanager +def session(): + global _shared_session + + if _shared_session: + yield _shared_session + return + + _shared_session = Session() + try: + yield _shared_session + finally: + _shared_session = None + + +def Session() -> requests.Session: + s = requests.Session() + s.headers["User-Agent"] = "Mozilla/5.0 Gecko/20100101 unwind/20210506" + return s + + +def throttle( + times: int, per_seconds: float, jitter: Callable[[], float] = None +) -> Callable[[Callable], Callable]: + + calls: Deque[float] = deque(maxlen=times) + + if jitter is None: + jitter = lambda: 0.0 + + def decorator(func: Callable) -> Callable: + @wraps(func) + def inner(*args, **kwds): + + # clean up + while calls: + if calls[0] + per_seconds > time(): + break + calls.popleft() + + # wait + if len(calls) == calls.maxlen: + wait_until = calls.popleft() + per_seconds + jitter() + timeout = wait_until - time() + log.debug(f"waiting {timeout:.2} seconds ... ⏳") + sleep(timeout) + + # call + calls.append(time()) + try: + r = func(*args, **kwds) + except Exception as e: + if getattr(e, "is_cached", False): + calls.pop() + raise + if getattr(r, "is_cached", False): + calls.pop() + + return r + + return inner + + return decorator + + +class CachedStr(str): + is_cached = True + + +@dataclass +class CachedResponse: + is_cached = True + status_code: int + text: str + url: str + headers: dict[str, str] = None + + def json(self): + return json.loads(self.text) + + +class RedirectError(RuntimeError): + def __init__(self, from_url: str, to_url: str, is_cached=False): + self.from_url = from_url + self.to_url = to_url + self.is_cached = is_cached + super().__init__(f"Redirected: {from_url} -> {to_url}") + + +@throttle(1, 1, random) +def http_get(s: requests.Session, url: str, *args, **kwds) -> requests.Response: + + req = s.prepare_request(requests.Request("GET", url, *args, **kwds)) + + if config.debug and config.cachedir: + sig = repr(req.url) # + repr(sorted(req.headers.items())) + cachefile = config.cachedir / md5(sig.encode()).hexdigest() + else: + cachefile = None + + if cachefile: + if cachefile.exists(): + log.debug( + f"loading {req.url} ({req.headers!r}) from cache {cachefile} ... 💾" + ) + with cachefile.open() as fp: + resp = CachedResponse(**json.load(fp)) + if 300 <= resp.status_code <= 399: + raise RedirectError( + from_url=resp.url, to_url=resp.headers["location"], is_cached=True + ) + return resp + + log.debug(f"loading {req.url} ({req.headers!r}) ... ⚡️") + resp = s.send(req, allow_redirects=False, stream=True) + resp.raise_for_status() + + if cachefile: + with cachefile.open("w") as fp: + json.dump( + { + "status_code": resp.status_code, + "text": resp.text, + "url": resp.url, + "headers": dict(resp.headers), + }, + fp, + ) + + if resp.is_redirect: + # Redirects could mean trouble, we need to stay on top of that! + raise RedirectError(from_url=resp.url, to_url=resp.headers["location"]) + + return resp + + +def soup_from_url(url): + with session() as s: + r = http_get(s, url) + + soup = bs4.BeautifulSoup(r.text, "html5lib") + return soup diff --git a/unwind/types.py b/unwind/types.py new file mode 100644 index 0000000..a54e0ec --- /dev/null +++ b/unwind/types.py @@ -0,0 +1,31 @@ +import re +from typing import Union, cast + +import ulid +from ulid.hints import Buffer + + +class ULID(ulid.ULID): + """Extended ULID type. + + Same as ulid.ULID, but allows initializing without a buffer, to make + it easier to use the class as a standard factory. + + For more information about ULIDs, see https://github.com/ulid/spec. + """ + + _pattern = re.compile(r"^[0-9A-HJKMNP-TV-Z]{26}$") + + def __init__(self, buffer: Union[Buffer, ulid.ULID, str, None] = None): + if isinstance(buffer, str): + if not self._pattern.search(buffer): + raise ValueError("Invalid ULID.") + buffer = ulid.from_str(buffer) + assert isinstance(buffer, ulid.ULID) + + if isinstance(buffer, ulid.ULID): + buffer = cast(memoryview, buffer.memory) + elif buffer is None: + buffer = cast(memoryview, ulid.new().memory) + + super().__init__(buffer) diff --git a/unwind/web.py b/unwind/web.py new file mode 100644 index 0000000..474d169 --- /dev/null +++ b/unwind/web.py @@ -0,0 +1,53 @@ +from collections import defaultdict + +from starlette.applications import Starlette +from starlette.responses import JSONResponse +from starlette.routing import Route + +from . import config +from .db import close_connection_pool, find_ratings, open_connection_pool + + +def imdb_url(imdb_id: str): + return f"https://www.imdb.com/title/{imdb_id}/" + + +def truthy(s: str): + return bool(s) and s.lower() in {"1", "yes", "true"} + + +async def ratings(request): + title = request.query_params.get("title") + media_type = request.query_params.get("media_type") + ignore_tv_episodes = truthy(request.query_params.get("ignore_tv_episodes")) + rows = await find_ratings( + title=title, media_type=media_type, ignore_tv_episodes=ignore_tv_episodes + ) + + aggr = {} + for r in rows: + mov = aggr.setdefault( + r["movie_imdb_id"], + { + "title": r["movie_title"], + "year": r["release_year"], + "link": imdb_url(r["movie_imdb_id"]), + "user_scores": [], + "imdb_score": r["imdb_score"], + "media_type": r["media_type"], + }, + ) + mov["user_scores"].append(r["user_score"]) + + resp = tuple(aggr.values()) + + return JSONResponse(resp) + + +app = Starlette( + on_startup=[open_connection_pool], + on_shutdown=[close_connection_pool], + routes=[ + Route("/ratings", ratings), + ], +)