init with some kind of working prototype
This commit is contained in:
commit
b5cb22822e
22 changed files with 1292 additions and 0 deletions
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
*.pyc
|
||||
/.cache
|
||||
/data/*
|
||||
/requirements.txt
|
||||
19
Dockerfile
Normal file
19
Dockerfile
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
FROM docker.io/library/python:3.9-alpine
|
||||
|
||||
RUN apk update --no-cache \
|
||||
&& apk upgrade --no-cache \
|
||||
&& pip install --no-cache-dir --upgrade pip
|
||||
|
||||
RUN addgroup -g 10001 py \
|
||||
&& adduser -D -u 10000 -G py py
|
||||
|
||||
WORKDIR /var/app
|
||||
|
||||
COPY requirements.txt ./
|
||||
|
||||
RUN pip install --no-cache-dir --upgrade --requirement requirements.txt
|
||||
|
||||
USER 10000:10001
|
||||
|
||||
ENTRYPOINT ["/var/app/run"]
|
||||
CMD ["server"]
|
||||
6
README.md
Normal file
6
README.md
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
# Unwind
|
||||
|
||||
A cache & aggregator for user ratings from iMDB and other sources.
|
||||
|
||||
Users are spread across many platforms and most platforms allow very limited access to their data.
|
||||
This service aims to offer one interface for all that data and open up access.
|
||||
378
poetry.lock
generated
Normal file
378
poetry.lock
generated
Normal file
|
|
@ -0,0 +1,378 @@
|
|||
[[package]]
|
||||
name = "aiosqlite"
|
||||
version = "0.17.0"
|
||||
description = "asyncio bridge to the standard sqlite3 module"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[package.dependencies]
|
||||
typing_extensions = ">=3.7.2"
|
||||
|
||||
[[package]]
|
||||
name = "asgiref"
|
||||
version = "3.3.4"
|
||||
description = "ASGI specs, helper code, and adapters"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[package.extras]
|
||||
tests = ["pytest", "pytest-asyncio", "mypy (>=0.800)"]
|
||||
|
||||
[[package]]
|
||||
name = "beautifulsoup4"
|
||||
version = "4.9.3"
|
||||
description = "Screen-scraping library"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
|
||||
[package.dependencies]
|
||||
soupsieve = {version = ">1.2", markers = "python_version >= \"3.0\""}
|
||||
|
||||
[package.extras]
|
||||
html5lib = ["html5lib"]
|
||||
lxml = ["lxml"]
|
||||
|
||||
[[package]]
|
||||
name = "certifi"
|
||||
version = "2020.12.5"
|
||||
description = "Python package for providing Mozilla's CA Bundle."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "chardet"
|
||||
version = "4.0.0"
|
||||
description = "Universal encoding detector for Python 2 and 3"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||
|
||||
[[package]]
|
||||
name = "click"
|
||||
version = "8.0.1"
|
||||
description = "Composable command line interface toolkit"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[package.dependencies]
|
||||
colorama = {version = "*", markers = "platform_system == \"Windows\""}
|
||||
|
||||
[[package]]
|
||||
name = "colorama"
|
||||
version = "0.4.4"
|
||||
description = "Cross-platform colored terminal text."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||
|
||||
[[package]]
|
||||
name = "databases"
|
||||
version = "0.4.3"
|
||||
description = "Async database support for Python."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[package.dependencies]
|
||||
aiosqlite = {version = "*", optional = true, markers = "extra == \"sqlite\""}
|
||||
sqlalchemy = "<1.4"
|
||||
|
||||
[package.extras]
|
||||
mysql = ["aiomysql"]
|
||||
postgresql = ["asyncpg"]
|
||||
postgresql_aiopg = ["aiopg"]
|
||||
sqlite = ["aiosqlite"]
|
||||
|
||||
[[package]]
|
||||
name = "h11"
|
||||
version = "0.12.0"
|
||||
description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[[package]]
|
||||
name = "html5lib"
|
||||
version = "1.1"
|
||||
description = "HTML parser based on the WHATWG HTML specification"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||
|
||||
[package.dependencies]
|
||||
six = ">=1.9"
|
||||
webencodings = "*"
|
||||
|
||||
[package.extras]
|
||||
all = ["genshi", "chardet (>=2.2)", "lxml"]
|
||||
chardet = ["chardet (>=2.2)"]
|
||||
genshi = ["genshi"]
|
||||
lxml = ["lxml"]
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "2.10"
|
||||
description = "Internationalized Domain Names in Applications (IDNA)"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||
|
||||
[[package]]
|
||||
name = "requests"
|
||||
version = "2.25.1"
|
||||
description = "Python HTTP for Humans."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||
|
||||
[package.dependencies]
|
||||
certifi = ">=2017.4.17"
|
||||
chardet = ">=3.0.2,<5"
|
||||
idna = ">=2.5,<3"
|
||||
urllib3 = ">=1.21.1,<1.27"
|
||||
|
||||
[package.extras]
|
||||
security = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)"]
|
||||
socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
|
||||
|
||||
[[package]]
|
||||
name = "six"
|
||||
version = "1.16.0"
|
||||
description = "Python 2 and 3 compatibility utilities"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
|
||||
|
||||
[[package]]
|
||||
name = "soupsieve"
|
||||
version = "2.2.1"
|
||||
description = "A modern CSS selector implementation for Beautiful Soup."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[[package]]
|
||||
name = "sqlalchemy"
|
||||
version = "1.3.24"
|
||||
description = "Database Abstraction Library"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||
|
||||
[package.extras]
|
||||
mssql = ["pyodbc"]
|
||||
mssql_pymssql = ["pymssql"]
|
||||
mssql_pyodbc = ["pyodbc"]
|
||||
mysql = ["mysqlclient"]
|
||||
oracle = ["cx-oracle"]
|
||||
postgresql = ["psycopg2"]
|
||||
postgresql_pg8000 = ["pg8000 (<1.16.6)"]
|
||||
postgresql_psycopg2binary = ["psycopg2-binary"]
|
||||
postgresql_psycopg2cffi = ["psycopg2cffi"]
|
||||
pymysql = ["pymysql (<1)", "pymysql"]
|
||||
|
||||
[[package]]
|
||||
name = "starlette"
|
||||
version = "0.14.2"
|
||||
description = "The little ASGI library that shines."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[package.extras]
|
||||
full = ["aiofiles", "graphene", "itsdangerous", "jinja2", "python-multipart", "pyyaml", "requests"]
|
||||
|
||||
[[package]]
|
||||
name = "toml"
|
||||
version = "0.10.2"
|
||||
description = "Python Library for Tom's Obvious, Minimal Language"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
|
||||
|
||||
[[package]]
|
||||
name = "typing-extensions"
|
||||
version = "3.10.0.0"
|
||||
description = "Backported and Experimental Type Hints for Python 3.5+"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "ulid-py"
|
||||
version = "1.1.0"
|
||||
description = "Universally Unique Lexicographically Sortable Identifier"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "urllib3"
|
||||
version = "1.26.4"
|
||||
description = "HTTP library with thread-safe connection pooling, file post, and more."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4"
|
||||
|
||||
[package.extras]
|
||||
secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"]
|
||||
socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
|
||||
brotli = ["brotlipy (>=0.6.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "uvicorn"
|
||||
version = "0.14.0"
|
||||
description = "The lightning-fast ASGI server."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
|
||||
[package.dependencies]
|
||||
asgiref = ">=3.3.4"
|
||||
click = ">=7"
|
||||
h11 = ">=0.8"
|
||||
|
||||
[package.extras]
|
||||
standard = ["websockets (>=9.1)", "httptools (>=0.2.0,<0.3.0)", "watchgod (>=0.6)", "python-dotenv (>=0.13)", "PyYAML (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "colorama (>=0.4)"]
|
||||
|
||||
[[package]]
|
||||
name = "webencodings"
|
||||
version = "0.5.1"
|
||||
description = "Character encoding aliases for legacy web content"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
|
||||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = "^3.9"
|
||||
content-hash = "28c14ec611e61db259fa6aa160df99308f7452874f69377a634d07cd379603c8"
|
||||
|
||||
[metadata.files]
|
||||
aiosqlite = [
|
||||
{file = "aiosqlite-0.17.0-py3-none-any.whl", hash = "sha256:6c49dc6d3405929b1d08eeccc72306d3677503cc5e5e43771efc1e00232e8231"},
|
||||
{file = "aiosqlite-0.17.0.tar.gz", hash = "sha256:f0e6acc24bc4864149267ac82fb46dfb3be4455f99fe21df82609cc6e6baee51"},
|
||||
]
|
||||
asgiref = [
|
||||
{file = "asgiref-3.3.4-py3-none-any.whl", hash = "sha256:92906c611ce6c967347bbfea733f13d6313901d54dcca88195eaeb52b2a8e8ee"},
|
||||
{file = "asgiref-3.3.4.tar.gz", hash = "sha256:d1216dfbdfb63826470995d31caed36225dcaf34f182e0fa257a4dd9e86f1b78"},
|
||||
]
|
||||
beautifulsoup4 = [
|
||||
{file = "beautifulsoup4-4.9.3-py2-none-any.whl", hash = "sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35"},
|
||||
{file = "beautifulsoup4-4.9.3-py3-none-any.whl", hash = "sha256:fff47e031e34ec82bf17e00da8f592fe7de69aeea38be00523c04623c04fb666"},
|
||||
{file = "beautifulsoup4-4.9.3.tar.gz", hash = "sha256:84729e322ad1d5b4d25f805bfa05b902dd96450f43842c4e99067d5e1369eb25"},
|
||||
]
|
||||
certifi = [
|
||||
{file = "certifi-2020.12.5-py2.py3-none-any.whl", hash = "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830"},
|
||||
{file = "certifi-2020.12.5.tar.gz", hash = "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c"},
|
||||
]
|
||||
chardet = [
|
||||
{file = "chardet-4.0.0-py2.py3-none-any.whl", hash = "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"},
|
||||
{file = "chardet-4.0.0.tar.gz", hash = "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa"},
|
||||
]
|
||||
click = [
|
||||
{file = "click-8.0.1-py3-none-any.whl", hash = "sha256:fba402a4a47334742d782209a7c79bc448911afe1149d07bdabdf480b3e2f4b6"},
|
||||
{file = "click-8.0.1.tar.gz", hash = "sha256:8c04c11192119b1ef78ea049e0a6f0463e4c48ef00a30160c704337586f3ad7a"},
|
||||
]
|
||||
colorama = [
|
||||
{file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
|
||||
{file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"},
|
||||
]
|
||||
databases = [
|
||||
{file = "databases-0.4.3-py3-none-any.whl", hash = "sha256:f82b02c28fdddf7ffe7ee1945f5abef44d687ba97b9a1c81492c7f035d4c90e6"},
|
||||
{file = "databases-0.4.3.tar.gz", hash = "sha256:1521db7f6d3c581ff81b3552e130b27a13aefea2a57295e65738081831137afc"},
|
||||
]
|
||||
h11 = [
|
||||
{file = "h11-0.12.0-py3-none-any.whl", hash = "sha256:36a3cb8c0a032f56e2da7084577878a035d3b61d104230d4bd49c0c6b555a9c6"},
|
||||
{file = "h11-0.12.0.tar.gz", hash = "sha256:47222cb6067e4a307d535814917cd98fd0a57b6788ce715755fa2b6c28b56042"},
|
||||
]
|
||||
html5lib = [
|
||||
{file = "html5lib-1.1-py2.py3-none-any.whl", hash = "sha256:0d78f8fde1c230e99fe37986a60526d7049ed4bf8a9fadbad5f00e22e58e041d"},
|
||||
{file = "html5lib-1.1.tar.gz", hash = "sha256:b2e5b40261e20f354d198eae92afc10d750afb487ed5e50f9c4eaf07c184146f"},
|
||||
]
|
||||
idna = [
|
||||
{file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"},
|
||||
{file = "idna-2.10.tar.gz", hash = "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6"},
|
||||
]
|
||||
requests = [
|
||||
{file = "requests-2.25.1-py2.py3-none-any.whl", hash = "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"},
|
||||
{file = "requests-2.25.1.tar.gz", hash = "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804"},
|
||||
]
|
||||
six = [
|
||||
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
|
||||
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
|
||||
]
|
||||
soupsieve = [
|
||||
{file = "soupsieve-2.2.1-py3-none-any.whl", hash = "sha256:c2c1c2d44f158cdbddab7824a9af8c4f83c76b1e23e049479aa432feb6c4c23b"},
|
||||
{file = "soupsieve-2.2.1.tar.gz", hash = "sha256:052774848f448cf19c7e959adf5566904d525f33a3f8b6ba6f6f8f26ec7de0cc"},
|
||||
]
|
||||
sqlalchemy = [
|
||||
{file = "SQLAlchemy-1.3.24-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:87a2725ad7d41cd7376373c15fd8bf674e9c33ca56d0b8036add2d634dba372e"},
|
||||
{file = "SQLAlchemy-1.3.24-cp27-cp27m-win32.whl", hash = "sha256:f597a243b8550a3a0b15122b14e49d8a7e622ba1c9d29776af741f1845478d79"},
|
||||
{file = "SQLAlchemy-1.3.24-cp27-cp27m-win_amd64.whl", hash = "sha256:fc4cddb0b474b12ed7bdce6be1b9edc65352e8ce66bc10ff8cbbfb3d4047dbf4"},
|
||||
{file = "SQLAlchemy-1.3.24-cp35-cp35m-macosx_10_14_x86_64.whl", hash = "sha256:f1149d6e5c49d069163e58a3196865e4321bad1803d7886e07d8710de392c548"},
|
||||
{file = "SQLAlchemy-1.3.24-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:14f0eb5db872c231b20c18b1e5806352723a3a89fb4254af3b3e14f22eaaec75"},
|
||||
{file = "SQLAlchemy-1.3.24-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:e98d09f487267f1e8d1179bf3b9d7709b30a916491997137dd24d6ae44d18d79"},
|
||||
{file = "SQLAlchemy-1.3.24-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:fc1f2a5a5963e2e73bac4926bdaf7790c4d7d77e8fc0590817880e22dd9d0b8b"},
|
||||
{file = "SQLAlchemy-1.3.24-cp35-cp35m-win32.whl", hash = "sha256:f3c5c52f7cb8b84bfaaf22d82cb9e6e9a8297f7c2ed14d806a0f5e4d22e83fb7"},
|
||||
{file = "SQLAlchemy-1.3.24-cp35-cp35m-win_amd64.whl", hash = "sha256:0352db1befcbed2f9282e72843f1963860bf0e0472a4fa5cf8ee084318e0e6ab"},
|
||||
{file = "SQLAlchemy-1.3.24-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:2ed6343b625b16bcb63c5b10523fd15ed8934e1ed0f772c534985e9f5e73d894"},
|
||||
{file = "SQLAlchemy-1.3.24-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:34fcec18f6e4b24b4a5f6185205a04f1eab1e56f8f1d028a2a03694ebcc2ddd4"},
|
||||
{file = "SQLAlchemy-1.3.24-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:e47e257ba5934550d7235665eee6c911dc7178419b614ba9e1fbb1ce6325b14f"},
|
||||
{file = "SQLAlchemy-1.3.24-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:816de75418ea0953b5eb7b8a74933ee5a46719491cd2b16f718afc4b291a9658"},
|
||||
{file = "SQLAlchemy-1.3.24-cp36-cp36m-win32.whl", hash = "sha256:26155ea7a243cbf23287f390dba13d7927ffa1586d3208e0e8d615d0c506f996"},
|
||||
{file = "SQLAlchemy-1.3.24-cp36-cp36m-win_amd64.whl", hash = "sha256:f03bd97650d2e42710fbe4cf8a59fae657f191df851fc9fc683ecef10746a375"},
|
||||
{file = "SQLAlchemy-1.3.24-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:a006d05d9aa052657ee3e4dc92544faae5fcbaafc6128217310945610d862d39"},
|
||||
{file = "SQLAlchemy-1.3.24-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:1e2f89d2e5e3c7a88e25a3b0e43626dba8db2aa700253023b82e630d12b37109"},
|
||||
{file = "SQLAlchemy-1.3.24-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:0d5d862b1cfbec5028ce1ecac06a3b42bc7703eb80e4b53fceb2738724311443"},
|
||||
{file = "SQLAlchemy-1.3.24-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:0172423a27fbcae3751ef016663b72e1a516777de324a76e30efa170dbd3dd2d"},
|
||||
{file = "SQLAlchemy-1.3.24-cp37-cp37m-win32.whl", hash = "sha256:d37843fb8df90376e9e91336724d78a32b988d3d20ab6656da4eb8ee3a45b63c"},
|
||||
{file = "SQLAlchemy-1.3.24-cp37-cp37m-win_amd64.whl", hash = "sha256:c10ff6112d119f82b1618b6dc28126798481b9355d8748b64b9b55051eb4f01b"},
|
||||
{file = "SQLAlchemy-1.3.24-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:861e459b0e97673af6cc5e7f597035c2e3acdfb2608132665406cded25ba64c7"},
|
||||
{file = "SQLAlchemy-1.3.24-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:5de2464c254380d8a6c20a2746614d5a436260be1507491442cf1088e59430d2"},
|
||||
{file = "SQLAlchemy-1.3.24-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:d375d8ccd3cebae8d90270f7aa8532fe05908f79e78ae489068f3b4eee5994e8"},
|
||||
{file = "SQLAlchemy-1.3.24-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:014ea143572fee1c18322b7908140ad23b3994036ef4c0d630110faf942652f8"},
|
||||
{file = "SQLAlchemy-1.3.24-cp38-cp38-win32.whl", hash = "sha256:6607ae6cd3a07f8a4c3198ffbf256c261661965742e2b5265a77cd5c679c9bba"},
|
||||
{file = "SQLAlchemy-1.3.24-cp38-cp38-win_amd64.whl", hash = "sha256:fcb251305fa24a490b6a9ee2180e5f8252915fb778d3dafc70f9cc3f863827b9"},
|
||||
{file = "SQLAlchemy-1.3.24-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:01aa5f803db724447c1d423ed583e42bf5264c597fd55e4add4301f163b0be48"},
|
||||
{file = "SQLAlchemy-1.3.24-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:4d0e3515ef98aa4f0dc289ff2eebb0ece6260bbf37c2ea2022aad63797eacf60"},
|
||||
{file = "SQLAlchemy-1.3.24-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:bce28277f308db43a6b4965734366f533b3ff009571ec7ffa583cb77539b84d6"},
|
||||
{file = "SQLAlchemy-1.3.24-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:8110e6c414d3efc574543109ee618fe2c1f96fa31833a1ff36cc34e968c4f233"},
|
||||
{file = "SQLAlchemy-1.3.24-cp39-cp39-win32.whl", hash = "sha256:ee5f5188edb20a29c1cc4a039b074fdc5575337c9a68f3063449ab47757bb064"},
|
||||
{file = "SQLAlchemy-1.3.24-cp39-cp39-win_amd64.whl", hash = "sha256:09083c2487ca3c0865dc588e07aeaa25416da3d95f7482c07e92f47e080aa17b"},
|
||||
{file = "SQLAlchemy-1.3.24.tar.gz", hash = "sha256:ebbb777cbf9312359b897bf81ba00dae0f5cb69fba2a18265dcc18a6f5ef7519"},
|
||||
]
|
||||
starlette = [
|
||||
{file = "starlette-0.14.2-py3-none-any.whl", hash = "sha256:3c8e48e52736b3161e34c9f0e8153b4f32ec5d8995a3ee1d59410d92f75162ed"},
|
||||
{file = "starlette-0.14.2.tar.gz", hash = "sha256:7d49f4a27f8742262ef1470608c59ddbc66baf37c148e938c7038e6bc7a998aa"},
|
||||
]
|
||||
toml = [
|
||||
{file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
|
||||
{file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
|
||||
]
|
||||
typing-extensions = [
|
||||
{file = "typing_extensions-3.10.0.0-py2-none-any.whl", hash = "sha256:0ac0f89795dd19de6b97debb0c6af1c70987fd80a2d62d1958f7e56fcc31b497"},
|
||||
{file = "typing_extensions-3.10.0.0-py3-none-any.whl", hash = "sha256:779383f6086d90c99ae41cf0ff39aac8a7937a9283ce0a414e5dd782f4c94a84"},
|
||||
{file = "typing_extensions-3.10.0.0.tar.gz", hash = "sha256:50b6f157849174217d0656f99dc82fe932884fb250826c18350e159ec6cdf342"},
|
||||
]
|
||||
ulid-py = [
|
||||
{file = "ulid-py-1.1.0.tar.gz", hash = "sha256:dc6884be91558df077c3011b9fb0c87d1097cb8fc6534b11f310161afd5738f0"},
|
||||
{file = "ulid_py-1.1.0-py2.py3-none-any.whl", hash = "sha256:b56a0f809ef90d6020b21b89a87a48edc7c03aea80e5ed5174172e82d76e3987"},
|
||||
]
|
||||
urllib3 = [
|
||||
{file = "urllib3-1.26.4-py2.py3-none-any.whl", hash = "sha256:2f4da4594db7e1e110a944bb1b551fdf4e6c136ad42e4234131391e21eb5b0df"},
|
||||
{file = "urllib3-1.26.4.tar.gz", hash = "sha256:e7b021f7241115872f92f43c6508082facffbd1c048e3c6e2bb9c2a157e28937"},
|
||||
]
|
||||
uvicorn = [
|
||||
{file = "uvicorn-0.14.0-py3-none-any.whl", hash = "sha256:2a76bb359171a504b3d1c853409af3adbfa5cef374a4a59e5881945a97a93eae"},
|
||||
{file = "uvicorn-0.14.0.tar.gz", hash = "sha256:45ad7dfaaa7d55cab4cd1e85e03f27e9d60bc067ddc59db52a2b0aeca8870292"},
|
||||
]
|
||||
webencodings = [
|
||||
{file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"},
|
||||
{file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"},
|
||||
]
|
||||
23
pyproject.toml
Normal file
23
pyproject.toml
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
[tool.poetry]
|
||||
name = "unwind"
|
||||
version = "0.1.0"
|
||||
description = ""
|
||||
authors = ["ducklet <ducklet@noreply.code.dumpr.org>"]
|
||||
license = "LOL"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.9"
|
||||
requests = "^2.25.1"
|
||||
beautifulsoup4 = "^4.9.3"
|
||||
html5lib = "^1.1"
|
||||
starlette = "^0.14.2"
|
||||
ulid-py = "^1.1.0"
|
||||
databases = {extras = ["sqlite"], version = "^0.4.3"}
|
||||
toml = "^0.10.2"
|
||||
uvicorn = "^0.14.0"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
5
pyrightconfig.json
Normal file
5
pyrightconfig.json
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"stubPath": "./stubs",
|
||||
"venvPath": ".",
|
||||
"venv": ".venv"
|
||||
}
|
||||
15
run
Executable file
15
run
Executable file
|
|
@ -0,0 +1,15 @@
|
|||
#!/bin/sh -eu
|
||||
|
||||
RUN_BIN=$(realpath "$0")
|
||||
RUN_DIR=$(dirname "$RUN_BIN")
|
||||
|
||||
export RUN_BIN
|
||||
export RUN_DIR
|
||||
|
||||
task="$1"
|
||||
shift
|
||||
|
||||
# export DEBUG=1
|
||||
# export UNWIND_LOGLEVEL=DEBUG
|
||||
|
||||
exec scripts/"$task" "$@"
|
||||
5
scripts/app
Executable file
5
scripts/app
Executable file
|
|
@ -0,0 +1,5 @@
|
|||
#!/bin/sh -eu
|
||||
|
||||
[ -z "${DEBUG:-}" ] || set -x
|
||||
|
||||
exec python -m unwind "$@"
|
||||
7
scripts/build
Executable file
7
scripts/build
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/bin/sh -eu
|
||||
|
||||
cd "$RUN_DIR"
|
||||
|
||||
[ -z "${DEBUG:-}" ] || set -x
|
||||
|
||||
exec poetry export -o requirements.txt
|
||||
5
scripts/dev
Executable file
5
scripts/dev
Executable file
|
|
@ -0,0 +1,5 @@
|
|||
#!/bin/sh -eu
|
||||
|
||||
[ -z "${DEBUG:-}" ] || set -x
|
||||
|
||||
exec uvicorn unwind:web_app --reload
|
||||
8
scripts/lint
Executable file
8
scripts/lint
Executable file
|
|
@ -0,0 +1,8 @@
|
|||
#!/bin/sh -eu
|
||||
|
||||
cd "$RUN_DIR"
|
||||
|
||||
[ -z "${DEBUG:-}" ] || set -x
|
||||
|
||||
isort --profile black unwind
|
||||
black unwind
|
||||
5
scripts/server
Executable file
5
scripts/server
Executable file
|
|
@ -0,0 +1,5 @@
|
|||
#!/bin/sh -eu
|
||||
|
||||
[ -z "${DEBUG:-}" ] || set -x
|
||||
|
||||
exec uvicorn --host 0.0.0.0 unwind:web_app
|
||||
1
unwind/__init__.py
Normal file
1
unwind/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
from .web import app as web_app
|
||||
36
unwind/__main__.py
Normal file
36
unwind/__main__.py
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
import asyncio
|
||||
import logging
|
||||
|
||||
from . import config
|
||||
from .db import close_connection_pool, open_connection_pool
|
||||
from .imdb import load_imdb
|
||||
from .request import session
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def run_import():
|
||||
await open_connection_pool()
|
||||
|
||||
with session() as s:
|
||||
s.headers["Accept-Language"] = "en-GB, en;q=0.5"
|
||||
|
||||
for name, imdb_id in config.imdb.items():
|
||||
log.info("Loading data for %s ... ⚡️", name)
|
||||
await load_imdb(imdb_id)
|
||||
|
||||
await close_connection_pool()
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig(
|
||||
format="%(asctime)s.%(msecs)03d [%(name)s:%(process)d] %(levelname)s: %(message)s",
|
||||
datefmt="%H:%M:%S",
|
||||
level=config.loglevel,
|
||||
)
|
||||
log.debug(f"Log level: {config.loglevel}")
|
||||
|
||||
asyncio.run(run_import())
|
||||
|
||||
|
||||
main()
|
||||
14
unwind/config.py
Normal file
14
unwind/config.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import toml
|
||||
|
||||
cachedir = (
|
||||
Path(cachedir) if (cachedir := os.getenv("UNWIND_CACHEDIR", ".cache")) else None
|
||||
)
|
||||
debug = os.getenv("DEBUG") == "1"
|
||||
loglevel = os.getenv("UNWIND_LOGLEVEL") or ("DEBUG" if debug else "INFO")
|
||||
storage_path = os.getenv("UNWIND_STORAGE", "./data/db.sqlite")
|
||||
config_path = os.getenv("UNWIND_CONFIG", "./data/config.toml")
|
||||
|
||||
imdb = toml.load(config_path)["imdb"]
|
||||
185
unwind/db.py
Normal file
185
unwind/db.py
Normal file
|
|
@ -0,0 +1,185 @@
|
|||
import logging
|
||||
from dataclasses import fields
|
||||
from pathlib import Path
|
||||
from typing import Optional, Type, TypeVar
|
||||
|
||||
from databases import Database
|
||||
|
||||
from . import config
|
||||
from .models import Movie, Rating, User, asplain, fromplain, utcnow
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
_shared_connection: Optional[Database] = None
|
||||
|
||||
|
||||
async def open_connection_pool() -> None:
|
||||
"""Open the DB connection pool.
|
||||
|
||||
This function needs to be called before any access to the database can happen.
|
||||
"""
|
||||
db = shared_connection()
|
||||
await db.connect()
|
||||
|
||||
await init_db(db)
|
||||
|
||||
|
||||
async def close_connection_pool() -> None:
|
||||
"""Close the DB connection pool.
|
||||
|
||||
This function should be called before the app shuts down to ensure all data
|
||||
has been flushed to the database.
|
||||
"""
|
||||
db = shared_connection()
|
||||
|
||||
# Run automatic ANALYZE prior to closing the db,
|
||||
# see https://sqlite.com/lang_analyze.html.
|
||||
await db.execute("PRAGMA analysis_limit=400")
|
||||
await db.execute("PRAGMA optimize")
|
||||
|
||||
await db.disconnect()
|
||||
|
||||
|
||||
def shared_connection() -> Database:
|
||||
global _shared_connection
|
||||
|
||||
if _shared_connection is None:
|
||||
uri = f"sqlite:///{config.storage_path}"
|
||||
_shared_connection = Database(uri)
|
||||
|
||||
return _shared_connection
|
||||
|
||||
|
||||
async def init_db(db):
|
||||
sql = Path(__file__).with_name("init.sql").read_text()
|
||||
async with db.transaction():
|
||||
for stmt in sql.split(";;"):
|
||||
await db.execute(query=stmt)
|
||||
|
||||
|
||||
async def add(item):
|
||||
values = asplain(item)
|
||||
keys = ", ".join(f"{k}" for k in values)
|
||||
placeholders = ", ".join(f":{k}" for k in values)
|
||||
query = f"INSERT INTO {item._table} ({keys}) VALUES ({placeholders})"
|
||||
await shared_connection().execute(query=query, values=values)
|
||||
|
||||
|
||||
ModelType = TypeVar("ModelType")
|
||||
|
||||
|
||||
async def get(model: Type[ModelType], **kwds) -> Optional[ModelType]:
|
||||
fields_ = ", ".join(f.name for f in fields(model))
|
||||
cond = " AND ".join(f"{k}=:{k}" for k in kwds)
|
||||
query = f"SELECT {fields_} FROM {model._table} WHERE {cond}"
|
||||
row = await shared_connection().fetch_one(query=query, values=kwds)
|
||||
return fromplain(model, row) if row else None
|
||||
|
||||
|
||||
async def update(item):
|
||||
values = asplain(item)
|
||||
keys = ", ".join(f"{k}=:{k}" for k in values if k != "id")
|
||||
query = f"UPDATE {item._table} SET {keys} WHERE id=:id"
|
||||
await shared_connection().execute(query=query, values=values)
|
||||
|
||||
|
||||
async def add_or_update_user(user: User):
|
||||
db_user = await get(User, imdb_id=user.imdb_id)
|
||||
if not db_user:
|
||||
await add(user)
|
||||
else:
|
||||
user.id = db_user.id
|
||||
|
||||
if user != db_user:
|
||||
await update(user)
|
||||
|
||||
|
||||
async def add_or_update_movie(movie: Movie):
|
||||
db_movie = await get(Movie, imdb_id=movie.imdb_id)
|
||||
if not db_movie:
|
||||
await add(movie)
|
||||
else:
|
||||
movie.id = db_movie.id
|
||||
movie.updated = db_movie.updated
|
||||
|
||||
if movie != db_movie:
|
||||
movie.updated = utcnow()
|
||||
await update(movie)
|
||||
|
||||
|
||||
async def add_or_update_rating(rating: Rating) -> bool:
|
||||
db_rating = await get(
|
||||
Rating, movie_id=str(rating.movie_id), user_id=str(rating.user_id)
|
||||
)
|
||||
|
||||
if not db_rating:
|
||||
await add(rating)
|
||||
return True
|
||||
|
||||
else:
|
||||
rating.id = db_rating.id
|
||||
|
||||
if rating != db_rating:
|
||||
await update(rating)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def sql_escape(s: str, char="#"):
|
||||
return s.replace(char, 2 * char).replace("%", f"{char}%").replace("_", f"{char}_")
|
||||
|
||||
|
||||
async def find_ratings(
|
||||
*,
|
||||
imdb_movie_id: str = None,
|
||||
title: str = None,
|
||||
media_type: str = None,
|
||||
ignore_tv_episodes: bool = False,
|
||||
limit_rows=10,
|
||||
):
|
||||
values = {
|
||||
"limit_rows": limit_rows,
|
||||
}
|
||||
|
||||
conditions = []
|
||||
if title:
|
||||
values["escape"] = "#"
|
||||
escaped_title = sql_escape(title, char=values["escape"])
|
||||
values["pattern"] = "%" + "%".join(escaped_title.split()) + "%"
|
||||
conditions.append("movies.title LIKE :pattern ESCAPE :escape")
|
||||
|
||||
if media_type:
|
||||
values["media_type"] = media_type
|
||||
conditions.append("movies.media_type=:media_type")
|
||||
|
||||
if ignore_tv_episodes:
|
||||
conditions.append("movies.media_type!='TV Episode'")
|
||||
|
||||
query = f"""
|
||||
WITH newest_movies
|
||||
AS (
|
||||
SELECT DISTINCT ratings.movie_id
|
||||
FROM ratings
|
||||
LEFT JOIN movies ON movies.id=ratings.movie_id
|
||||
{('WHERE ' + ' AND '.join(conditions)) if conditions else ''}
|
||||
ORDER BY length(movies.title) ASC, ratings.rating_date DESC
|
||||
LIMIT :limit_rows
|
||||
)
|
||||
|
||||
SELECT
|
||||
users.name AS user_name,
|
||||
ratings.score AS user_score,
|
||||
movies.score AS imdb_score,
|
||||
movies.imdb_id AS movie_imdb_id,
|
||||
movies.media_type AS media_type,
|
||||
movies.title AS movie_title,
|
||||
movies.release_year AS release_year
|
||||
FROM newest_movies
|
||||
LEFT JOIN ratings ON ratings.movie_id=newest_movies.movie_id
|
||||
LEFT JOIN users ON users.id=ratings.user_id
|
||||
LEFT JOIN movies ON movies.id=ratings.movie_id
|
||||
"""
|
||||
|
||||
rows = await shared_connection().fetch_all(query=query, values=values)
|
||||
return tuple(dict(r) for r in rows)
|
||||
132
unwind/imdb.py
Normal file
132
unwind/imdb.py
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
import logging
|
||||
import re
|
||||
from collections import namedtuple
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from .db import add_or_update_movie, add_or_update_rating, add_or_update_user
|
||||
from .models import Movie, Rating, User, asplain, fromplain
|
||||
from .request import soup_from_url
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# div#ratings-container
|
||||
# div.lister-item.mode-detail
|
||||
# div.lister-item-content
|
||||
# h3.lister-item-header
|
||||
# a
|
||||
# [href]
|
||||
# .text
|
||||
# span.lister-item-year.text
|
||||
# br
|
||||
# a
|
||||
# [href]
|
||||
# .text
|
||||
# span.lister-item-year.text
|
||||
# span.runtime.text
|
||||
# span.genre.text
|
||||
# div.ipl-rating-widget
|
||||
# div.ipl-rating-star.small
|
||||
# span.ipl-rating-star__rating.text
|
||||
# div.ipl-rating-star.ipl-rating-star--other-user.small
|
||||
# span.ipl-rating-star__rating.text
|
||||
# p.text-muted.text ("Rated on 06 May 2021")
|
||||
|
||||
|
||||
def imdb_url(user_id):
|
||||
return f"https://www.imdb.com/user/{user_id}/ratings"
|
||||
|
||||
|
||||
find_name = re.compile(r"(?P<name>.*)'s Ratings").fullmatch
|
||||
find_rating_date = re.compile(r"Rated on (?P<date>\d{2} \w+ \d{4})").fullmatch
|
||||
find_runtime = re.compile(r"((?P<h>\d+) hr)? ?((?P<m>\d+) min)?").fullmatch
|
||||
# find_year = re.compile(
|
||||
# r"(\([IVX]+\) )?\((?P<year>\d{4})(–( |\d{4})| TV (Special|Movie)| Video)?\)"
|
||||
# ).fullmatch
|
||||
find_year = re.compile(
|
||||
r"(\([IVX]+\) )?\((?P<year>\d{4})(–( |\d{4})| (?P<type>[^)]+))?\)"
|
||||
).fullmatch
|
||||
find_movie_id = re.compile(r"/title/(?P<id>tt\d+)/").search
|
||||
|
||||
|
||||
async def parse_page(url, stop_on_dupe=True) -> Optional[str]:
|
||||
soup = soup_from_url(url)
|
||||
|
||||
user = User(imdb_id=soup.find("meta", property="pageId")["content"], name="")
|
||||
if match := find_name(soup.h1.string):
|
||||
user.name = match["name"]
|
||||
await add_or_update_user(user)
|
||||
|
||||
items = soup.find_all("div", "lister-item-content")
|
||||
for i, item in enumerate(items):
|
||||
|
||||
movie = Movie(
|
||||
title=item.h3.a.string.strip(),
|
||||
genres=set(s.strip() for s in item.find("span", "genre").string.split(",")),
|
||||
)
|
||||
|
||||
episode_br = item.h3.br
|
||||
if episode_br:
|
||||
episode_a = episode_br.find_next("a")
|
||||
if not episode_a:
|
||||
log.error("Unknown document structure.")
|
||||
continue
|
||||
|
||||
movie.media_type = "TV Episode"
|
||||
movie.title += " / " + episode_a.string.strip()
|
||||
if match := find_year(
|
||||
episode_br.find_next("span", "lister-item-year").string
|
||||
):
|
||||
movie.release_year = int(match["year"])
|
||||
if match := find_movie_id(episode_a["href"]):
|
||||
movie.imdb_id = match["id"]
|
||||
|
||||
rating = Rating(user_id=user.id)
|
||||
|
||||
if (tag := item.find("span", "runtime")) and (
|
||||
match := find_runtime(tag.string)
|
||||
):
|
||||
movie.runtime = int(match["h"] or 0) * 60 + int(match["m"] or 0)
|
||||
|
||||
if not episode_br:
|
||||
if match := find_year(item.h3.find("span", "lister-item-year").string):
|
||||
if media_type := match["type"]:
|
||||
movie.media_type = media_type.strip()
|
||||
movie.release_year = int(match["year"])
|
||||
if match := find_movie_id(item.h3.a["href"]):
|
||||
movie.imdb_id = match["id"]
|
||||
|
||||
ratings_item = item.find("div", "ipl-rating-widget")
|
||||
if match := find_rating_date(ratings_item.find_next("p", "text-muted").string):
|
||||
rating.rating_date = datetime.strptime(match["date"], "%d %b %Y")
|
||||
for rating_item in ratings_item.find_all("span", "ipl-rating-star__rating")[:2]:
|
||||
if "ipl-rating-star--other-user" in rating_item.parent["class"]:
|
||||
rating.score = int(float(rating_item.string) * 10)
|
||||
else:
|
||||
movie.score = int(float(rating_item.string) * 10)
|
||||
|
||||
if not movie.media_type:
|
||||
movie.media_type = "Movie"
|
||||
|
||||
await add_or_update_movie(movie)
|
||||
|
||||
rating.movie_id = movie.id # needs to be set _after_ movie has been updated
|
||||
is_updated = await add_or_update_rating(rating)
|
||||
|
||||
if stop_on_dupe and not is_updated:
|
||||
log.info("Import stopped after %s items. Caught up to known state. ✋", i)
|
||||
return None
|
||||
|
||||
next_url = urljoin(
|
||||
url, soup.find("div", "footer").find(string=re.compile(r"Next")).parent["href"]
|
||||
)
|
||||
|
||||
return next_url if url != next_url else None
|
||||
|
||||
|
||||
async def load_imdb(user_id):
|
||||
next_url = imdb_url(user_id)
|
||||
|
||||
while next_url := await parse_page(next_url):
|
||||
pass
|
||||
36
unwind/init.sql
Normal file
36
unwind/init.sql
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
PRAGMA foreign_keys = ON;;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS users (
|
||||
id TEXT NOT NULL PRIMARY KEY,
|
||||
imdb_id TEXT NOT NULL UNIQUE,
|
||||
name TEXT NOT NULL
|
||||
);;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS movies (
|
||||
id TEXT NOT NULL PRIMARY KEY,
|
||||
title TEXT NOT NULL,
|
||||
release_year NUMBER NOT NULL,
|
||||
media_type TEXT NOT NULL,
|
||||
imdb_id TEXT NOT NULL UNIQUE,
|
||||
score NUMBER NOT NULL,
|
||||
runtime NUMBER,
|
||||
genres TEXT NOT NULL,
|
||||
updated TEXT NOT NULL
|
||||
);;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ratings (
|
||||
id TEXT NOT NULL PRIMARY KEY,
|
||||
movie_id TEXT NOT NULL,
|
||||
user_id TEXT NOT NULL,
|
||||
score NUMBER NOT NULL,
|
||||
rating_date TEXT NOT NULL,
|
||||
favorite NUMBER,
|
||||
finished NUMBER,
|
||||
FOREIGN KEY(movie_id) REFERENCES movies(id),
|
||||
FOREIGN KEY(user_id) REFERENCES users(id)
|
||||
);;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS ratings_index ON ratings (
|
||||
movie_id,
|
||||
user_id
|
||||
);;
|
||||
139
unwind/models.py
Normal file
139
unwind/models.py
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
import json
|
||||
from dataclasses import asdict, dataclass, field, fields, is_dataclass
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, ClassVar, Optional, Type, Union, get_args, get_origin
|
||||
|
||||
from .types import ULID
|
||||
|
||||
|
||||
def is_optional(tp: Type):
|
||||
if get_origin(tp) is not Union:
|
||||
return False
|
||||
|
||||
args = get_args(tp)
|
||||
return len(args) == 2 and type(None) in args
|
||||
|
||||
|
||||
def optional_type(tp: Type):
|
||||
if get_origin(tp) is not Union:
|
||||
return None
|
||||
|
||||
args = get_args(tp)
|
||||
if len(args) != 2 or args[1] is not type(None):
|
||||
return None
|
||||
|
||||
return args[0]
|
||||
|
||||
|
||||
def asplain(o) -> dict[str, Any]:
|
||||
validate(o)
|
||||
|
||||
d = asdict(o)
|
||||
for f in fields(o):
|
||||
|
||||
target = f.type
|
||||
# XXX this doesn't properly support any kind of nested types
|
||||
if (otype := optional_type(f.type)) is not None:
|
||||
target = otype
|
||||
if (otype := get_origin(target)) is not None:
|
||||
target = otype
|
||||
|
||||
v = d[f.name]
|
||||
if target is ULID:
|
||||
d[f.name] = str(v)
|
||||
elif target in {datetime}:
|
||||
d[f.name] = v.isoformat()
|
||||
elif target in {set}:
|
||||
d[f.name] = json.dumps(list(sorted(v)))
|
||||
elif target in {list}:
|
||||
d[f.name] = json.dumps(list(v))
|
||||
elif target in {bool, str, int, float, None}:
|
||||
pass
|
||||
else:
|
||||
raise ValueError(f"Unsupported value type: {f.name}: {type(v)}")
|
||||
|
||||
return d
|
||||
|
||||
|
||||
def fromplain(cls, d: dict[str, Any]):
|
||||
# if not is_dataclass(cls):
|
||||
# raise TypeError(f'Not a dataclass: {type(cls)}')
|
||||
|
||||
dd = {}
|
||||
for f in fields(cls):
|
||||
|
||||
target = f.type
|
||||
otype = optional_type(f.type)
|
||||
is_opt = otype is not None
|
||||
if is_opt:
|
||||
target = otype
|
||||
if (xtype := get_origin(target)) is not None:
|
||||
target = xtype
|
||||
|
||||
v = d[f.name]
|
||||
if is_opt and v is None:
|
||||
dd[f.name] = v
|
||||
elif isinstance(v, target):
|
||||
dd[f.name] = v
|
||||
elif target in {set, list}:
|
||||
dd[f.name] = target(json.loads(v))
|
||||
elif target in {datetime}:
|
||||
dd[f.name] = target.fromisoformat(v)
|
||||
else:
|
||||
dd[f.name] = target(v)
|
||||
|
||||
o = cls(**dd)
|
||||
validate(o)
|
||||
return o
|
||||
|
||||
|
||||
def validate(o):
|
||||
for f in fields(o):
|
||||
vtype = type(getattr(o, f.name))
|
||||
if vtype is not f.type:
|
||||
if get_origin(f.type) is vtype or (
|
||||
get_origin(f.type) is Union and vtype in get_args(f.type)
|
||||
):
|
||||
continue
|
||||
raise ValueError(f"Invalid value type: {f.name}: {vtype}")
|
||||
|
||||
|
||||
def utcnow():
|
||||
return datetime.now().replace(tzinfo=timezone.utc)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Movie:
|
||||
_table: ClassVar[str] = "movies"
|
||||
|
||||
id: ULID = field(default_factory=ULID)
|
||||
title: str = None # canonical title
|
||||
release_year: int = None # canonical release date
|
||||
media_type: Optional[str] = None
|
||||
imdb_id: str = None
|
||||
score: int = None # range: [0,100]
|
||||
runtime: Optional[int] = None # minutes
|
||||
genres: set[str] = None
|
||||
updated: datetime = field(default_factory=utcnow)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Rating:
|
||||
_table: ClassVar[str] = "ratings"
|
||||
|
||||
id: ULID = field(default_factory=ULID)
|
||||
movie_id: ULID = None
|
||||
user_id: ULID = None
|
||||
score: int = None # range: [0,100]
|
||||
rating_date: datetime = None
|
||||
favorite: Optional[bool] = None
|
||||
finished: Optional[bool] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class User:
|
||||
_table: ClassVar[str] = "users"
|
||||
|
||||
id: ULID = field(default_factory=ULID)
|
||||
imdb_id: str = None
|
||||
name: str = None # canonical user name
|
||||
185
unwind/request.py
Normal file
185
unwind/request.py
Normal file
|
|
@ -0,0 +1,185 @@
|
|||
import json
|
||||
import logging
|
||||
from collections import deque
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass
|
||||
from functools import wraps
|
||||
from hashlib import md5
|
||||
from random import random
|
||||
from time import sleep, time
|
||||
from typing import Callable
|
||||
|
||||
import bs4
|
||||
import requests
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
from . import config
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
if config.debug and config.cachedir:
|
||||
config.cachedir.mkdir(exist_ok=True)
|
||||
|
||||
|
||||
def set_retries(s: requests.Session, n: int, backoff_factor: float = 0.2):
|
||||
retry = (
|
||||
Retry(
|
||||
total=n,
|
||||
connect=n,
|
||||
read=n,
|
||||
status=n,
|
||||
status_forcelist=Retry.RETRY_AFTER_STATUS_CODES,
|
||||
backoff_factor=backoff_factor,
|
||||
)
|
||||
if n
|
||||
else Retry(0, read=False)
|
||||
)
|
||||
for a in s.adapters.values():
|
||||
a.max_retries = retry
|
||||
|
||||
|
||||
_shared_session = None
|
||||
|
||||
|
||||
@contextmanager
|
||||
def session():
|
||||
global _shared_session
|
||||
|
||||
if _shared_session:
|
||||
yield _shared_session
|
||||
return
|
||||
|
||||
_shared_session = Session()
|
||||
try:
|
||||
yield _shared_session
|
||||
finally:
|
||||
_shared_session = None
|
||||
|
||||
|
||||
def Session() -> requests.Session:
|
||||
s = requests.Session()
|
||||
s.headers["User-Agent"] = "Mozilla/5.0 Gecko/20100101 unwind/20210506"
|
||||
return s
|
||||
|
||||
|
||||
def throttle(
|
||||
times: int, per_seconds: float, jitter: Callable[[], float] = None
|
||||
) -> Callable[[Callable], Callable]:
|
||||
|
||||
calls: Deque[float] = deque(maxlen=times)
|
||||
|
||||
if jitter is None:
|
||||
jitter = lambda: 0.0
|
||||
|
||||
def decorator(func: Callable) -> Callable:
|
||||
@wraps(func)
|
||||
def inner(*args, **kwds):
|
||||
|
||||
# clean up
|
||||
while calls:
|
||||
if calls[0] + per_seconds > time():
|
||||
break
|
||||
calls.popleft()
|
||||
|
||||
# wait
|
||||
if len(calls) == calls.maxlen:
|
||||
wait_until = calls.popleft() + per_seconds + jitter()
|
||||
timeout = wait_until - time()
|
||||
log.debug(f"waiting {timeout:.2} seconds ... ⏳")
|
||||
sleep(timeout)
|
||||
|
||||
# call
|
||||
calls.append(time())
|
||||
try:
|
||||
r = func(*args, **kwds)
|
||||
except Exception as e:
|
||||
if getattr(e, "is_cached", False):
|
||||
calls.pop()
|
||||
raise
|
||||
if getattr(r, "is_cached", False):
|
||||
calls.pop()
|
||||
|
||||
return r
|
||||
|
||||
return inner
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
class CachedStr(str):
|
||||
is_cached = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class CachedResponse:
|
||||
is_cached = True
|
||||
status_code: int
|
||||
text: str
|
||||
url: str
|
||||
headers: dict[str, str] = None
|
||||
|
||||
def json(self):
|
||||
return json.loads(self.text)
|
||||
|
||||
|
||||
class RedirectError(RuntimeError):
|
||||
def __init__(self, from_url: str, to_url: str, is_cached=False):
|
||||
self.from_url = from_url
|
||||
self.to_url = to_url
|
||||
self.is_cached = is_cached
|
||||
super().__init__(f"Redirected: {from_url} -> {to_url}")
|
||||
|
||||
|
||||
@throttle(1, 1, random)
|
||||
def http_get(s: requests.Session, url: str, *args, **kwds) -> requests.Response:
|
||||
|
||||
req = s.prepare_request(requests.Request("GET", url, *args, **kwds))
|
||||
|
||||
if config.debug and config.cachedir:
|
||||
sig = repr(req.url) # + repr(sorted(req.headers.items()))
|
||||
cachefile = config.cachedir / md5(sig.encode()).hexdigest()
|
||||
else:
|
||||
cachefile = None
|
||||
|
||||
if cachefile:
|
||||
if cachefile.exists():
|
||||
log.debug(
|
||||
f"loading {req.url} ({req.headers!r}) from cache {cachefile} ... 💾"
|
||||
)
|
||||
with cachefile.open() as fp:
|
||||
resp = CachedResponse(**json.load(fp))
|
||||
if 300 <= resp.status_code <= 399:
|
||||
raise RedirectError(
|
||||
from_url=resp.url, to_url=resp.headers["location"], is_cached=True
|
||||
)
|
||||
return resp
|
||||
|
||||
log.debug(f"loading {req.url} ({req.headers!r}) ... ⚡️")
|
||||
resp = s.send(req, allow_redirects=False, stream=True)
|
||||
resp.raise_for_status()
|
||||
|
||||
if cachefile:
|
||||
with cachefile.open("w") as fp:
|
||||
json.dump(
|
||||
{
|
||||
"status_code": resp.status_code,
|
||||
"text": resp.text,
|
||||
"url": resp.url,
|
||||
"headers": dict(resp.headers),
|
||||
},
|
||||
fp,
|
||||
)
|
||||
|
||||
if resp.is_redirect:
|
||||
# Redirects could mean trouble, we need to stay on top of that!
|
||||
raise RedirectError(from_url=resp.url, to_url=resp.headers["location"])
|
||||
|
||||
return resp
|
||||
|
||||
|
||||
def soup_from_url(url):
|
||||
with session() as s:
|
||||
r = http_get(s, url)
|
||||
|
||||
soup = bs4.BeautifulSoup(r.text, "html5lib")
|
||||
return soup
|
||||
31
unwind/types.py
Normal file
31
unwind/types.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
import re
|
||||
from typing import Union, cast
|
||||
|
||||
import ulid
|
||||
from ulid.hints import Buffer
|
||||
|
||||
|
||||
class ULID(ulid.ULID):
|
||||
"""Extended ULID type.
|
||||
|
||||
Same as ulid.ULID, but allows initializing without a buffer, to make
|
||||
it easier to use the class as a standard factory.
|
||||
|
||||
For more information about ULIDs, see https://github.com/ulid/spec.
|
||||
"""
|
||||
|
||||
_pattern = re.compile(r"^[0-9A-HJKMNP-TV-Z]{26}$")
|
||||
|
||||
def __init__(self, buffer: Union[Buffer, ulid.ULID, str, None] = None):
|
||||
if isinstance(buffer, str):
|
||||
if not self._pattern.search(buffer):
|
||||
raise ValueError("Invalid ULID.")
|
||||
buffer = ulid.from_str(buffer)
|
||||
assert isinstance(buffer, ulid.ULID)
|
||||
|
||||
if isinstance(buffer, ulid.ULID):
|
||||
buffer = cast(memoryview, buffer.memory)
|
||||
elif buffer is None:
|
||||
buffer = cast(memoryview, ulid.new().memory)
|
||||
|
||||
super().__init__(buffer)
|
||||
53
unwind/web.py
Normal file
53
unwind/web.py
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
from collections import defaultdict
|
||||
|
||||
from starlette.applications import Starlette
|
||||
from starlette.responses import JSONResponse
|
||||
from starlette.routing import Route
|
||||
|
||||
from . import config
|
||||
from .db import close_connection_pool, find_ratings, open_connection_pool
|
||||
|
||||
|
||||
def imdb_url(imdb_id: str):
|
||||
return f"https://www.imdb.com/title/{imdb_id}/"
|
||||
|
||||
|
||||
def truthy(s: str):
|
||||
return bool(s) and s.lower() in {"1", "yes", "true"}
|
||||
|
||||
|
||||
async def ratings(request):
|
||||
title = request.query_params.get("title")
|
||||
media_type = request.query_params.get("media_type")
|
||||
ignore_tv_episodes = truthy(request.query_params.get("ignore_tv_episodes"))
|
||||
rows = await find_ratings(
|
||||
title=title, media_type=media_type, ignore_tv_episodes=ignore_tv_episodes
|
||||
)
|
||||
|
||||
aggr = {}
|
||||
for r in rows:
|
||||
mov = aggr.setdefault(
|
||||
r["movie_imdb_id"],
|
||||
{
|
||||
"title": r["movie_title"],
|
||||
"year": r["release_year"],
|
||||
"link": imdb_url(r["movie_imdb_id"]),
|
||||
"user_scores": [],
|
||||
"imdb_score": r["imdb_score"],
|
||||
"media_type": r["media_type"],
|
||||
},
|
||||
)
|
||||
mov["user_scores"].append(r["user_score"])
|
||||
|
||||
resp = tuple(aggr.values())
|
||||
|
||||
return JSONResponse(resp)
|
||||
|
||||
|
||||
app = Starlette(
|
||||
on_startup=[open_connection_pool],
|
||||
on_shutdown=[close_connection_pool],
|
||||
routes=[
|
||||
Route("/ratings", ratings),
|
||||
],
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue