init with some kind of working prototype

This commit is contained in:
ducklet 2021-06-15 19:09:21 +02:00
commit b5cb22822e
22 changed files with 1292 additions and 0 deletions

4
.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
*.pyc
/.cache
/data/*
/requirements.txt

19
Dockerfile Normal file
View file

@ -0,0 +1,19 @@
FROM docker.io/library/python:3.9-alpine
RUN apk update --no-cache \
&& apk upgrade --no-cache \
&& pip install --no-cache-dir --upgrade pip
RUN addgroup -g 10001 py \
&& adduser -D -u 10000 -G py py
WORKDIR /var/app
COPY requirements.txt ./
RUN pip install --no-cache-dir --upgrade --requirement requirements.txt
USER 10000:10001
ENTRYPOINT ["/var/app/run"]
CMD ["server"]

6
README.md Normal file
View file

@ -0,0 +1,6 @@
# Unwind
A cache & aggregator for user ratings from iMDB and other sources.
Users are spread across many platforms and most platforms allow very limited access to their data.
This service aims to offer one interface for all that data and open up access.

378
poetry.lock generated Normal file
View file

@ -0,0 +1,378 @@
[[package]]
name = "aiosqlite"
version = "0.17.0"
description = "asyncio bridge to the standard sqlite3 module"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
typing_extensions = ">=3.7.2"
[[package]]
name = "asgiref"
version = "3.3.4"
description = "ASGI specs, helper code, and adapters"
category = "main"
optional = false
python-versions = ">=3.6"
[package.extras]
tests = ["pytest", "pytest-asyncio", "mypy (>=0.800)"]
[[package]]
name = "beautifulsoup4"
version = "4.9.3"
description = "Screen-scraping library"
category = "main"
optional = false
python-versions = "*"
[package.dependencies]
soupsieve = {version = ">1.2", markers = "python_version >= \"3.0\""}
[package.extras]
html5lib = ["html5lib"]
lxml = ["lxml"]
[[package]]
name = "certifi"
version = "2020.12.5"
description = "Python package for providing Mozilla's CA Bundle."
category = "main"
optional = false
python-versions = "*"
[[package]]
name = "chardet"
version = "4.0.0"
description = "Universal encoding detector for Python 2 and 3"
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[[package]]
name = "click"
version = "8.0.1"
description = "Composable command line interface toolkit"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
colorama = {version = "*", markers = "platform_system == \"Windows\""}
[[package]]
name = "colorama"
version = "0.4.4"
description = "Cross-platform colored terminal text."
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[[package]]
name = "databases"
version = "0.4.3"
description = "Async database support for Python."
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
aiosqlite = {version = "*", optional = true, markers = "extra == \"sqlite\""}
sqlalchemy = "<1.4"
[package.extras]
mysql = ["aiomysql"]
postgresql = ["asyncpg"]
postgresql_aiopg = ["aiopg"]
sqlite = ["aiosqlite"]
[[package]]
name = "h11"
version = "0.12.0"
description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
category = "main"
optional = false
python-versions = ">=3.6"
[[package]]
name = "html5lib"
version = "1.1"
description = "HTML parser based on the WHATWG HTML specification"
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[package.dependencies]
six = ">=1.9"
webencodings = "*"
[package.extras]
all = ["genshi", "chardet (>=2.2)", "lxml"]
chardet = ["chardet (>=2.2)"]
genshi = ["genshi"]
lxml = ["lxml"]
[[package]]
name = "idna"
version = "2.10"
description = "Internationalized Domain Names in Applications (IDNA)"
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
[[package]]
name = "requests"
version = "2.25.1"
description = "Python HTTP for Humans."
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[package.dependencies]
certifi = ">=2017.4.17"
chardet = ">=3.0.2,<5"
idna = ">=2.5,<3"
urllib3 = ">=1.21.1,<1.27"
[package.extras]
security = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)"]
socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
[[package]]
name = "six"
version = "1.16.0"
description = "Python 2 and 3 compatibility utilities"
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
[[package]]
name = "soupsieve"
version = "2.2.1"
description = "A modern CSS selector implementation for Beautiful Soup."
category = "main"
optional = false
python-versions = ">=3.6"
[[package]]
name = "sqlalchemy"
version = "1.3.24"
description = "Database Abstraction Library"
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
[package.extras]
mssql = ["pyodbc"]
mssql_pymssql = ["pymssql"]
mssql_pyodbc = ["pyodbc"]
mysql = ["mysqlclient"]
oracle = ["cx-oracle"]
postgresql = ["psycopg2"]
postgresql_pg8000 = ["pg8000 (<1.16.6)"]
postgresql_psycopg2binary = ["psycopg2-binary"]
postgresql_psycopg2cffi = ["psycopg2cffi"]
pymysql = ["pymysql (<1)", "pymysql"]
[[package]]
name = "starlette"
version = "0.14.2"
description = "The little ASGI library that shines."
category = "main"
optional = false
python-versions = ">=3.6"
[package.extras]
full = ["aiofiles", "graphene", "itsdangerous", "jinja2", "python-multipart", "pyyaml", "requests"]
[[package]]
name = "toml"
version = "0.10.2"
description = "Python Library for Tom's Obvious, Minimal Language"
category = "main"
optional = false
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
[[package]]
name = "typing-extensions"
version = "3.10.0.0"
description = "Backported and Experimental Type Hints for Python 3.5+"
category = "main"
optional = false
python-versions = "*"
[[package]]
name = "ulid-py"
version = "1.1.0"
description = "Universally Unique Lexicographically Sortable Identifier"
category = "main"
optional = false
python-versions = "*"
[[package]]
name = "urllib3"
version = "1.26.4"
description = "HTTP library with thread-safe connection pooling, file post, and more."
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4"
[package.extras]
secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"]
socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
brotli = ["brotlipy (>=0.6.0)"]
[[package]]
name = "uvicorn"
version = "0.14.0"
description = "The lightning-fast ASGI server."
category = "main"
optional = false
python-versions = "*"
[package.dependencies]
asgiref = ">=3.3.4"
click = ">=7"
h11 = ">=0.8"
[package.extras]
standard = ["websockets (>=9.1)", "httptools (>=0.2.0,<0.3.0)", "watchgod (>=0.6)", "python-dotenv (>=0.13)", "PyYAML (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "colorama (>=0.4)"]
[[package]]
name = "webencodings"
version = "0.5.1"
description = "Character encoding aliases for legacy web content"
category = "main"
optional = false
python-versions = "*"
[metadata]
lock-version = "1.1"
python-versions = "^3.9"
content-hash = "28c14ec611e61db259fa6aa160df99308f7452874f69377a634d07cd379603c8"
[metadata.files]
aiosqlite = [
{file = "aiosqlite-0.17.0-py3-none-any.whl", hash = "sha256:6c49dc6d3405929b1d08eeccc72306d3677503cc5e5e43771efc1e00232e8231"},
{file = "aiosqlite-0.17.0.tar.gz", hash = "sha256:f0e6acc24bc4864149267ac82fb46dfb3be4455f99fe21df82609cc6e6baee51"},
]
asgiref = [
{file = "asgiref-3.3.4-py3-none-any.whl", hash = "sha256:92906c611ce6c967347bbfea733f13d6313901d54dcca88195eaeb52b2a8e8ee"},
{file = "asgiref-3.3.4.tar.gz", hash = "sha256:d1216dfbdfb63826470995d31caed36225dcaf34f182e0fa257a4dd9e86f1b78"},
]
beautifulsoup4 = [
{file = "beautifulsoup4-4.9.3-py2-none-any.whl", hash = "sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35"},
{file = "beautifulsoup4-4.9.3-py3-none-any.whl", hash = "sha256:fff47e031e34ec82bf17e00da8f592fe7de69aeea38be00523c04623c04fb666"},
{file = "beautifulsoup4-4.9.3.tar.gz", hash = "sha256:84729e322ad1d5b4d25f805bfa05b902dd96450f43842c4e99067d5e1369eb25"},
]
certifi = [
{file = "certifi-2020.12.5-py2.py3-none-any.whl", hash = "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830"},
{file = "certifi-2020.12.5.tar.gz", hash = "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c"},
]
chardet = [
{file = "chardet-4.0.0-py2.py3-none-any.whl", hash = "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"},
{file = "chardet-4.0.0.tar.gz", hash = "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa"},
]
click = [
{file = "click-8.0.1-py3-none-any.whl", hash = "sha256:fba402a4a47334742d782209a7c79bc448911afe1149d07bdabdf480b3e2f4b6"},
{file = "click-8.0.1.tar.gz", hash = "sha256:8c04c11192119b1ef78ea049e0a6f0463e4c48ef00a30160c704337586f3ad7a"},
]
colorama = [
{file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
{file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"},
]
databases = [
{file = "databases-0.4.3-py3-none-any.whl", hash = "sha256:f82b02c28fdddf7ffe7ee1945f5abef44d687ba97b9a1c81492c7f035d4c90e6"},
{file = "databases-0.4.3.tar.gz", hash = "sha256:1521db7f6d3c581ff81b3552e130b27a13aefea2a57295e65738081831137afc"},
]
h11 = [
{file = "h11-0.12.0-py3-none-any.whl", hash = "sha256:36a3cb8c0a032f56e2da7084577878a035d3b61d104230d4bd49c0c6b555a9c6"},
{file = "h11-0.12.0.tar.gz", hash = "sha256:47222cb6067e4a307d535814917cd98fd0a57b6788ce715755fa2b6c28b56042"},
]
html5lib = [
{file = "html5lib-1.1-py2.py3-none-any.whl", hash = "sha256:0d78f8fde1c230e99fe37986a60526d7049ed4bf8a9fadbad5f00e22e58e041d"},
{file = "html5lib-1.1.tar.gz", hash = "sha256:b2e5b40261e20f354d198eae92afc10d750afb487ed5e50f9c4eaf07c184146f"},
]
idna = [
{file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"},
{file = "idna-2.10.tar.gz", hash = "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6"},
]
requests = [
{file = "requests-2.25.1-py2.py3-none-any.whl", hash = "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"},
{file = "requests-2.25.1.tar.gz", hash = "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804"},
]
six = [
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
]
soupsieve = [
{file = "soupsieve-2.2.1-py3-none-any.whl", hash = "sha256:c2c1c2d44f158cdbddab7824a9af8c4f83c76b1e23e049479aa432feb6c4c23b"},
{file = "soupsieve-2.2.1.tar.gz", hash = "sha256:052774848f448cf19c7e959adf5566904d525f33a3f8b6ba6f6f8f26ec7de0cc"},
]
sqlalchemy = [
{file = "SQLAlchemy-1.3.24-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:87a2725ad7d41cd7376373c15fd8bf674e9c33ca56d0b8036add2d634dba372e"},
{file = "SQLAlchemy-1.3.24-cp27-cp27m-win32.whl", hash = "sha256:f597a243b8550a3a0b15122b14e49d8a7e622ba1c9d29776af741f1845478d79"},
{file = "SQLAlchemy-1.3.24-cp27-cp27m-win_amd64.whl", hash = "sha256:fc4cddb0b474b12ed7bdce6be1b9edc65352e8ce66bc10ff8cbbfb3d4047dbf4"},
{file = "SQLAlchemy-1.3.24-cp35-cp35m-macosx_10_14_x86_64.whl", hash = "sha256:f1149d6e5c49d069163e58a3196865e4321bad1803d7886e07d8710de392c548"},
{file = "SQLAlchemy-1.3.24-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:14f0eb5db872c231b20c18b1e5806352723a3a89fb4254af3b3e14f22eaaec75"},
{file = "SQLAlchemy-1.3.24-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:e98d09f487267f1e8d1179bf3b9d7709b30a916491997137dd24d6ae44d18d79"},
{file = "SQLAlchemy-1.3.24-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:fc1f2a5a5963e2e73bac4926bdaf7790c4d7d77e8fc0590817880e22dd9d0b8b"},
{file = "SQLAlchemy-1.3.24-cp35-cp35m-win32.whl", hash = "sha256:f3c5c52f7cb8b84bfaaf22d82cb9e6e9a8297f7c2ed14d806a0f5e4d22e83fb7"},
{file = "SQLAlchemy-1.3.24-cp35-cp35m-win_amd64.whl", hash = "sha256:0352db1befcbed2f9282e72843f1963860bf0e0472a4fa5cf8ee084318e0e6ab"},
{file = "SQLAlchemy-1.3.24-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:2ed6343b625b16bcb63c5b10523fd15ed8934e1ed0f772c534985e9f5e73d894"},
{file = "SQLAlchemy-1.3.24-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:34fcec18f6e4b24b4a5f6185205a04f1eab1e56f8f1d028a2a03694ebcc2ddd4"},
{file = "SQLAlchemy-1.3.24-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:e47e257ba5934550d7235665eee6c911dc7178419b614ba9e1fbb1ce6325b14f"},
{file = "SQLAlchemy-1.3.24-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:816de75418ea0953b5eb7b8a74933ee5a46719491cd2b16f718afc4b291a9658"},
{file = "SQLAlchemy-1.3.24-cp36-cp36m-win32.whl", hash = "sha256:26155ea7a243cbf23287f390dba13d7927ffa1586d3208e0e8d615d0c506f996"},
{file = "SQLAlchemy-1.3.24-cp36-cp36m-win_amd64.whl", hash = "sha256:f03bd97650d2e42710fbe4cf8a59fae657f191df851fc9fc683ecef10746a375"},
{file = "SQLAlchemy-1.3.24-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:a006d05d9aa052657ee3e4dc92544faae5fcbaafc6128217310945610d862d39"},
{file = "SQLAlchemy-1.3.24-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:1e2f89d2e5e3c7a88e25a3b0e43626dba8db2aa700253023b82e630d12b37109"},
{file = "SQLAlchemy-1.3.24-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:0d5d862b1cfbec5028ce1ecac06a3b42bc7703eb80e4b53fceb2738724311443"},
{file = "SQLAlchemy-1.3.24-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:0172423a27fbcae3751ef016663b72e1a516777de324a76e30efa170dbd3dd2d"},
{file = "SQLAlchemy-1.3.24-cp37-cp37m-win32.whl", hash = "sha256:d37843fb8df90376e9e91336724d78a32b988d3d20ab6656da4eb8ee3a45b63c"},
{file = "SQLAlchemy-1.3.24-cp37-cp37m-win_amd64.whl", hash = "sha256:c10ff6112d119f82b1618b6dc28126798481b9355d8748b64b9b55051eb4f01b"},
{file = "SQLAlchemy-1.3.24-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:861e459b0e97673af6cc5e7f597035c2e3acdfb2608132665406cded25ba64c7"},
{file = "SQLAlchemy-1.3.24-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:5de2464c254380d8a6c20a2746614d5a436260be1507491442cf1088e59430d2"},
{file = "SQLAlchemy-1.3.24-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:d375d8ccd3cebae8d90270f7aa8532fe05908f79e78ae489068f3b4eee5994e8"},
{file = "SQLAlchemy-1.3.24-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:014ea143572fee1c18322b7908140ad23b3994036ef4c0d630110faf942652f8"},
{file = "SQLAlchemy-1.3.24-cp38-cp38-win32.whl", hash = "sha256:6607ae6cd3a07f8a4c3198ffbf256c261661965742e2b5265a77cd5c679c9bba"},
{file = "SQLAlchemy-1.3.24-cp38-cp38-win_amd64.whl", hash = "sha256:fcb251305fa24a490b6a9ee2180e5f8252915fb778d3dafc70f9cc3f863827b9"},
{file = "SQLAlchemy-1.3.24-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:01aa5f803db724447c1d423ed583e42bf5264c597fd55e4add4301f163b0be48"},
{file = "SQLAlchemy-1.3.24-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:4d0e3515ef98aa4f0dc289ff2eebb0ece6260bbf37c2ea2022aad63797eacf60"},
{file = "SQLAlchemy-1.3.24-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:bce28277f308db43a6b4965734366f533b3ff009571ec7ffa583cb77539b84d6"},
{file = "SQLAlchemy-1.3.24-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:8110e6c414d3efc574543109ee618fe2c1f96fa31833a1ff36cc34e968c4f233"},
{file = "SQLAlchemy-1.3.24-cp39-cp39-win32.whl", hash = "sha256:ee5f5188edb20a29c1cc4a039b074fdc5575337c9a68f3063449ab47757bb064"},
{file = "SQLAlchemy-1.3.24-cp39-cp39-win_amd64.whl", hash = "sha256:09083c2487ca3c0865dc588e07aeaa25416da3d95f7482c07e92f47e080aa17b"},
{file = "SQLAlchemy-1.3.24.tar.gz", hash = "sha256:ebbb777cbf9312359b897bf81ba00dae0f5cb69fba2a18265dcc18a6f5ef7519"},
]
starlette = [
{file = "starlette-0.14.2-py3-none-any.whl", hash = "sha256:3c8e48e52736b3161e34c9f0e8153b4f32ec5d8995a3ee1d59410d92f75162ed"},
{file = "starlette-0.14.2.tar.gz", hash = "sha256:7d49f4a27f8742262ef1470608c59ddbc66baf37c148e938c7038e6bc7a998aa"},
]
toml = [
{file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
{file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
]
typing-extensions = [
{file = "typing_extensions-3.10.0.0-py2-none-any.whl", hash = "sha256:0ac0f89795dd19de6b97debb0c6af1c70987fd80a2d62d1958f7e56fcc31b497"},
{file = "typing_extensions-3.10.0.0-py3-none-any.whl", hash = "sha256:779383f6086d90c99ae41cf0ff39aac8a7937a9283ce0a414e5dd782f4c94a84"},
{file = "typing_extensions-3.10.0.0.tar.gz", hash = "sha256:50b6f157849174217d0656f99dc82fe932884fb250826c18350e159ec6cdf342"},
]
ulid-py = [
{file = "ulid-py-1.1.0.tar.gz", hash = "sha256:dc6884be91558df077c3011b9fb0c87d1097cb8fc6534b11f310161afd5738f0"},
{file = "ulid_py-1.1.0-py2.py3-none-any.whl", hash = "sha256:b56a0f809ef90d6020b21b89a87a48edc7c03aea80e5ed5174172e82d76e3987"},
]
urllib3 = [
{file = "urllib3-1.26.4-py2.py3-none-any.whl", hash = "sha256:2f4da4594db7e1e110a944bb1b551fdf4e6c136ad42e4234131391e21eb5b0df"},
{file = "urllib3-1.26.4.tar.gz", hash = "sha256:e7b021f7241115872f92f43c6508082facffbd1c048e3c6e2bb9c2a157e28937"},
]
uvicorn = [
{file = "uvicorn-0.14.0-py3-none-any.whl", hash = "sha256:2a76bb359171a504b3d1c853409af3adbfa5cef374a4a59e5881945a97a93eae"},
{file = "uvicorn-0.14.0.tar.gz", hash = "sha256:45ad7dfaaa7d55cab4cd1e85e03f27e9d60bc067ddc59db52a2b0aeca8870292"},
]
webencodings = [
{file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"},
{file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"},
]

23
pyproject.toml Normal file
View file

@ -0,0 +1,23 @@
[tool.poetry]
name = "unwind"
version = "0.1.0"
description = ""
authors = ["ducklet <ducklet@noreply.code.dumpr.org>"]
license = "LOL"
[tool.poetry.dependencies]
python = "^3.9"
requests = "^2.25.1"
beautifulsoup4 = "^4.9.3"
html5lib = "^1.1"
starlette = "^0.14.2"
ulid-py = "^1.1.0"
databases = {extras = ["sqlite"], version = "^0.4.3"}
toml = "^0.10.2"
uvicorn = "^0.14.0"
[tool.poetry.dev-dependencies]
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

5
pyrightconfig.json Normal file
View file

@ -0,0 +1,5 @@
{
"stubPath": "./stubs",
"venvPath": ".",
"venv": ".venv"
}

15
run Executable file
View file

@ -0,0 +1,15 @@
#!/bin/sh -eu
RUN_BIN=$(realpath "$0")
RUN_DIR=$(dirname "$RUN_BIN")
export RUN_BIN
export RUN_DIR
task="$1"
shift
# export DEBUG=1
# export UNWIND_LOGLEVEL=DEBUG
exec scripts/"$task" "$@"

5
scripts/app Executable file
View file

@ -0,0 +1,5 @@
#!/bin/sh -eu
[ -z "${DEBUG:-}" ] || set -x
exec python -m unwind "$@"

7
scripts/build Executable file
View file

@ -0,0 +1,7 @@
#!/bin/sh -eu
cd "$RUN_DIR"
[ -z "${DEBUG:-}" ] || set -x
exec poetry export -o requirements.txt

5
scripts/dev Executable file
View file

@ -0,0 +1,5 @@
#!/bin/sh -eu
[ -z "${DEBUG:-}" ] || set -x
exec uvicorn unwind:web_app --reload

8
scripts/lint Executable file
View file

@ -0,0 +1,8 @@
#!/bin/sh -eu
cd "$RUN_DIR"
[ -z "${DEBUG:-}" ] || set -x
isort --profile black unwind
black unwind

5
scripts/server Executable file
View file

@ -0,0 +1,5 @@
#!/bin/sh -eu
[ -z "${DEBUG:-}" ] || set -x
exec uvicorn --host 0.0.0.0 unwind:web_app

1
unwind/__init__.py Normal file
View file

@ -0,0 +1 @@
from .web import app as web_app

36
unwind/__main__.py Normal file
View file

@ -0,0 +1,36 @@
import asyncio
import logging
from . import config
from .db import close_connection_pool, open_connection_pool
from .imdb import load_imdb
from .request import session
log = logging.getLogger(__name__)
async def run_import():
await open_connection_pool()
with session() as s:
s.headers["Accept-Language"] = "en-GB, en;q=0.5"
for name, imdb_id in config.imdb.items():
log.info("Loading data for %s ... ⚡️", name)
await load_imdb(imdb_id)
await close_connection_pool()
def main():
logging.basicConfig(
format="%(asctime)s.%(msecs)03d [%(name)s:%(process)d] %(levelname)s: %(message)s",
datefmt="%H:%M:%S",
level=config.loglevel,
)
log.debug(f"Log level: {config.loglevel}")
asyncio.run(run_import())
main()

14
unwind/config.py Normal file
View file

@ -0,0 +1,14 @@
import os
from pathlib import Path
import toml
cachedir = (
Path(cachedir) if (cachedir := os.getenv("UNWIND_CACHEDIR", ".cache")) else None
)
debug = os.getenv("DEBUG") == "1"
loglevel = os.getenv("UNWIND_LOGLEVEL") or ("DEBUG" if debug else "INFO")
storage_path = os.getenv("UNWIND_STORAGE", "./data/db.sqlite")
config_path = os.getenv("UNWIND_CONFIG", "./data/config.toml")
imdb = toml.load(config_path)["imdb"]

185
unwind/db.py Normal file
View file

@ -0,0 +1,185 @@
import logging
from dataclasses import fields
from pathlib import Path
from typing import Optional, Type, TypeVar
from databases import Database
from . import config
from .models import Movie, Rating, User, asplain, fromplain, utcnow
log = logging.getLogger(__name__)
_shared_connection: Optional[Database] = None
async def open_connection_pool() -> None:
"""Open the DB connection pool.
This function needs to be called before any access to the database can happen.
"""
db = shared_connection()
await db.connect()
await init_db(db)
async def close_connection_pool() -> None:
"""Close the DB connection pool.
This function should be called before the app shuts down to ensure all data
has been flushed to the database.
"""
db = shared_connection()
# Run automatic ANALYZE prior to closing the db,
# see https://sqlite.com/lang_analyze.html.
await db.execute("PRAGMA analysis_limit=400")
await db.execute("PRAGMA optimize")
await db.disconnect()
def shared_connection() -> Database:
global _shared_connection
if _shared_connection is None:
uri = f"sqlite:///{config.storage_path}"
_shared_connection = Database(uri)
return _shared_connection
async def init_db(db):
sql = Path(__file__).with_name("init.sql").read_text()
async with db.transaction():
for stmt in sql.split(";;"):
await db.execute(query=stmt)
async def add(item):
values = asplain(item)
keys = ", ".join(f"{k}" for k in values)
placeholders = ", ".join(f":{k}" for k in values)
query = f"INSERT INTO {item._table} ({keys}) VALUES ({placeholders})"
await shared_connection().execute(query=query, values=values)
ModelType = TypeVar("ModelType")
async def get(model: Type[ModelType], **kwds) -> Optional[ModelType]:
fields_ = ", ".join(f.name for f in fields(model))
cond = " AND ".join(f"{k}=:{k}" for k in kwds)
query = f"SELECT {fields_} FROM {model._table} WHERE {cond}"
row = await shared_connection().fetch_one(query=query, values=kwds)
return fromplain(model, row) if row else None
async def update(item):
values = asplain(item)
keys = ", ".join(f"{k}=:{k}" for k in values if k != "id")
query = f"UPDATE {item._table} SET {keys} WHERE id=:id"
await shared_connection().execute(query=query, values=values)
async def add_or_update_user(user: User):
db_user = await get(User, imdb_id=user.imdb_id)
if not db_user:
await add(user)
else:
user.id = db_user.id
if user != db_user:
await update(user)
async def add_or_update_movie(movie: Movie):
db_movie = await get(Movie, imdb_id=movie.imdb_id)
if not db_movie:
await add(movie)
else:
movie.id = db_movie.id
movie.updated = db_movie.updated
if movie != db_movie:
movie.updated = utcnow()
await update(movie)
async def add_or_update_rating(rating: Rating) -> bool:
db_rating = await get(
Rating, movie_id=str(rating.movie_id), user_id=str(rating.user_id)
)
if not db_rating:
await add(rating)
return True
else:
rating.id = db_rating.id
if rating != db_rating:
await update(rating)
return True
return False
def sql_escape(s: str, char="#"):
return s.replace(char, 2 * char).replace("%", f"{char}%").replace("_", f"{char}_")
async def find_ratings(
*,
imdb_movie_id: str = None,
title: str = None,
media_type: str = None,
ignore_tv_episodes: bool = False,
limit_rows=10,
):
values = {
"limit_rows": limit_rows,
}
conditions = []
if title:
values["escape"] = "#"
escaped_title = sql_escape(title, char=values["escape"])
values["pattern"] = "%" + "%".join(escaped_title.split()) + "%"
conditions.append("movies.title LIKE :pattern ESCAPE :escape")
if media_type:
values["media_type"] = media_type
conditions.append("movies.media_type=:media_type")
if ignore_tv_episodes:
conditions.append("movies.media_type!='TV Episode'")
query = f"""
WITH newest_movies
AS (
SELECT DISTINCT ratings.movie_id
FROM ratings
LEFT JOIN movies ON movies.id=ratings.movie_id
{('WHERE ' + ' AND '.join(conditions)) if conditions else ''}
ORDER BY length(movies.title) ASC, ratings.rating_date DESC
LIMIT :limit_rows
)
SELECT
users.name AS user_name,
ratings.score AS user_score,
movies.score AS imdb_score,
movies.imdb_id AS movie_imdb_id,
movies.media_type AS media_type,
movies.title AS movie_title,
movies.release_year AS release_year
FROM newest_movies
LEFT JOIN ratings ON ratings.movie_id=newest_movies.movie_id
LEFT JOIN users ON users.id=ratings.user_id
LEFT JOIN movies ON movies.id=ratings.movie_id
"""
rows = await shared_connection().fetch_all(query=query, values=values)
return tuple(dict(r) for r in rows)

132
unwind/imdb.py Normal file
View file

@ -0,0 +1,132 @@
import logging
import re
from collections import namedtuple
from datetime import datetime
from typing import Optional
from urllib.parse import urljoin
from .db import add_or_update_movie, add_or_update_rating, add_or_update_user
from .models import Movie, Rating, User, asplain, fromplain
from .request import soup_from_url
log = logging.getLogger(__name__)
# div#ratings-container
# div.lister-item.mode-detail
# div.lister-item-content
# h3.lister-item-header
# a
# [href]
# .text
# span.lister-item-year.text
# br
# a
# [href]
# .text
# span.lister-item-year.text
# span.runtime.text
# span.genre.text
# div.ipl-rating-widget
# div.ipl-rating-star.small
# span.ipl-rating-star__rating.text
# div.ipl-rating-star.ipl-rating-star--other-user.small
# span.ipl-rating-star__rating.text
# p.text-muted.text ("Rated on 06 May 2021")
def imdb_url(user_id):
return f"https://www.imdb.com/user/{user_id}/ratings"
find_name = re.compile(r"(?P<name>.*)'s Ratings").fullmatch
find_rating_date = re.compile(r"Rated on (?P<date>\d{2} \w+ \d{4})").fullmatch
find_runtime = re.compile(r"((?P<h>\d+) hr)? ?((?P<m>\d+) min)?").fullmatch
# find_year = re.compile(
# r"(\([IVX]+\) )?\((?P<year>\d{4})(( |\d{4})| TV (Special|Movie)| Video)?\)"
# ).fullmatch
find_year = re.compile(
r"(\([IVX]+\) )?\((?P<year>\d{4})(( |\d{4})| (?P<type>[^)]+))?\)"
).fullmatch
find_movie_id = re.compile(r"/title/(?P<id>tt\d+)/").search
async def parse_page(url, stop_on_dupe=True) -> Optional[str]:
soup = soup_from_url(url)
user = User(imdb_id=soup.find("meta", property="pageId")["content"], name="")
if match := find_name(soup.h1.string):
user.name = match["name"]
await add_or_update_user(user)
items = soup.find_all("div", "lister-item-content")
for i, item in enumerate(items):
movie = Movie(
title=item.h3.a.string.strip(),
genres=set(s.strip() for s in item.find("span", "genre").string.split(",")),
)
episode_br = item.h3.br
if episode_br:
episode_a = episode_br.find_next("a")
if not episode_a:
log.error("Unknown document structure.")
continue
movie.media_type = "TV Episode"
movie.title += " / " + episode_a.string.strip()
if match := find_year(
episode_br.find_next("span", "lister-item-year").string
):
movie.release_year = int(match["year"])
if match := find_movie_id(episode_a["href"]):
movie.imdb_id = match["id"]
rating = Rating(user_id=user.id)
if (tag := item.find("span", "runtime")) and (
match := find_runtime(tag.string)
):
movie.runtime = int(match["h"] or 0) * 60 + int(match["m"] or 0)
if not episode_br:
if match := find_year(item.h3.find("span", "lister-item-year").string):
if media_type := match["type"]:
movie.media_type = media_type.strip()
movie.release_year = int(match["year"])
if match := find_movie_id(item.h3.a["href"]):
movie.imdb_id = match["id"]
ratings_item = item.find("div", "ipl-rating-widget")
if match := find_rating_date(ratings_item.find_next("p", "text-muted").string):
rating.rating_date = datetime.strptime(match["date"], "%d %b %Y")
for rating_item in ratings_item.find_all("span", "ipl-rating-star__rating")[:2]:
if "ipl-rating-star--other-user" in rating_item.parent["class"]:
rating.score = int(float(rating_item.string) * 10)
else:
movie.score = int(float(rating_item.string) * 10)
if not movie.media_type:
movie.media_type = "Movie"
await add_or_update_movie(movie)
rating.movie_id = movie.id # needs to be set _after_ movie has been updated
is_updated = await add_or_update_rating(rating)
if stop_on_dupe and not is_updated:
log.info("Import stopped after %s items. Caught up to known state. ✋", i)
return None
next_url = urljoin(
url, soup.find("div", "footer").find(string=re.compile(r"Next")).parent["href"]
)
return next_url if url != next_url else None
async def load_imdb(user_id):
next_url = imdb_url(user_id)
while next_url := await parse_page(next_url):
pass

36
unwind/init.sql Normal file
View file

@ -0,0 +1,36 @@
PRAGMA foreign_keys = ON;;
CREATE TABLE IF NOT EXISTS users (
id TEXT NOT NULL PRIMARY KEY,
imdb_id TEXT NOT NULL UNIQUE,
name TEXT NOT NULL
);;
CREATE TABLE IF NOT EXISTS movies (
id TEXT NOT NULL PRIMARY KEY,
title TEXT NOT NULL,
release_year NUMBER NOT NULL,
media_type TEXT NOT NULL,
imdb_id TEXT NOT NULL UNIQUE,
score NUMBER NOT NULL,
runtime NUMBER,
genres TEXT NOT NULL,
updated TEXT NOT NULL
);;
CREATE TABLE IF NOT EXISTS ratings (
id TEXT NOT NULL PRIMARY KEY,
movie_id TEXT NOT NULL,
user_id TEXT NOT NULL,
score NUMBER NOT NULL,
rating_date TEXT NOT NULL,
favorite NUMBER,
finished NUMBER,
FOREIGN KEY(movie_id) REFERENCES movies(id),
FOREIGN KEY(user_id) REFERENCES users(id)
);;
CREATE UNIQUE INDEX IF NOT EXISTS ratings_index ON ratings (
movie_id,
user_id
);;

139
unwind/models.py Normal file
View file

@ -0,0 +1,139 @@
import json
from dataclasses import asdict, dataclass, field, fields, is_dataclass
from datetime import datetime, timezone
from typing import Any, ClassVar, Optional, Type, Union, get_args, get_origin
from .types import ULID
def is_optional(tp: Type):
if get_origin(tp) is not Union:
return False
args = get_args(tp)
return len(args) == 2 and type(None) in args
def optional_type(tp: Type):
if get_origin(tp) is not Union:
return None
args = get_args(tp)
if len(args) != 2 or args[1] is not type(None):
return None
return args[0]
def asplain(o) -> dict[str, Any]:
validate(o)
d = asdict(o)
for f in fields(o):
target = f.type
# XXX this doesn't properly support any kind of nested types
if (otype := optional_type(f.type)) is not None:
target = otype
if (otype := get_origin(target)) is not None:
target = otype
v = d[f.name]
if target is ULID:
d[f.name] = str(v)
elif target in {datetime}:
d[f.name] = v.isoformat()
elif target in {set}:
d[f.name] = json.dumps(list(sorted(v)))
elif target in {list}:
d[f.name] = json.dumps(list(v))
elif target in {bool, str, int, float, None}:
pass
else:
raise ValueError(f"Unsupported value type: {f.name}: {type(v)}")
return d
def fromplain(cls, d: dict[str, Any]):
# if not is_dataclass(cls):
# raise TypeError(f'Not a dataclass: {type(cls)}')
dd = {}
for f in fields(cls):
target = f.type
otype = optional_type(f.type)
is_opt = otype is not None
if is_opt:
target = otype
if (xtype := get_origin(target)) is not None:
target = xtype
v = d[f.name]
if is_opt and v is None:
dd[f.name] = v
elif isinstance(v, target):
dd[f.name] = v
elif target in {set, list}:
dd[f.name] = target(json.loads(v))
elif target in {datetime}:
dd[f.name] = target.fromisoformat(v)
else:
dd[f.name] = target(v)
o = cls(**dd)
validate(o)
return o
def validate(o):
for f in fields(o):
vtype = type(getattr(o, f.name))
if vtype is not f.type:
if get_origin(f.type) is vtype or (
get_origin(f.type) is Union and vtype in get_args(f.type)
):
continue
raise ValueError(f"Invalid value type: {f.name}: {vtype}")
def utcnow():
return datetime.now().replace(tzinfo=timezone.utc)
@dataclass
class Movie:
_table: ClassVar[str] = "movies"
id: ULID = field(default_factory=ULID)
title: str = None # canonical title
release_year: int = None # canonical release date
media_type: Optional[str] = None
imdb_id: str = None
score: int = None # range: [0,100]
runtime: Optional[int] = None # minutes
genres: set[str] = None
updated: datetime = field(default_factory=utcnow)
@dataclass
class Rating:
_table: ClassVar[str] = "ratings"
id: ULID = field(default_factory=ULID)
movie_id: ULID = None
user_id: ULID = None
score: int = None # range: [0,100]
rating_date: datetime = None
favorite: Optional[bool] = None
finished: Optional[bool] = None
@dataclass
class User:
_table: ClassVar[str] = "users"
id: ULID = field(default_factory=ULID)
imdb_id: str = None
name: str = None # canonical user name

185
unwind/request.py Normal file
View file

@ -0,0 +1,185 @@
import json
import logging
from collections import deque
from contextlib import contextmanager
from dataclasses import dataclass
from functools import wraps
from hashlib import md5
from random import random
from time import sleep, time
from typing import Callable
import bs4
import requests
from urllib3.util.retry import Retry
from . import config
log = logging.getLogger(__name__)
if config.debug and config.cachedir:
config.cachedir.mkdir(exist_ok=True)
def set_retries(s: requests.Session, n: int, backoff_factor: float = 0.2):
retry = (
Retry(
total=n,
connect=n,
read=n,
status=n,
status_forcelist=Retry.RETRY_AFTER_STATUS_CODES,
backoff_factor=backoff_factor,
)
if n
else Retry(0, read=False)
)
for a in s.adapters.values():
a.max_retries = retry
_shared_session = None
@contextmanager
def session():
global _shared_session
if _shared_session:
yield _shared_session
return
_shared_session = Session()
try:
yield _shared_session
finally:
_shared_session = None
def Session() -> requests.Session:
s = requests.Session()
s.headers["User-Agent"] = "Mozilla/5.0 Gecko/20100101 unwind/20210506"
return s
def throttle(
times: int, per_seconds: float, jitter: Callable[[], float] = None
) -> Callable[[Callable], Callable]:
calls: Deque[float] = deque(maxlen=times)
if jitter is None:
jitter = lambda: 0.0
def decorator(func: Callable) -> Callable:
@wraps(func)
def inner(*args, **kwds):
# clean up
while calls:
if calls[0] + per_seconds > time():
break
calls.popleft()
# wait
if len(calls) == calls.maxlen:
wait_until = calls.popleft() + per_seconds + jitter()
timeout = wait_until - time()
log.debug(f"waiting {timeout:.2} seconds ... ⏳")
sleep(timeout)
# call
calls.append(time())
try:
r = func(*args, **kwds)
except Exception as e:
if getattr(e, "is_cached", False):
calls.pop()
raise
if getattr(r, "is_cached", False):
calls.pop()
return r
return inner
return decorator
class CachedStr(str):
is_cached = True
@dataclass
class CachedResponse:
is_cached = True
status_code: int
text: str
url: str
headers: dict[str, str] = None
def json(self):
return json.loads(self.text)
class RedirectError(RuntimeError):
def __init__(self, from_url: str, to_url: str, is_cached=False):
self.from_url = from_url
self.to_url = to_url
self.is_cached = is_cached
super().__init__(f"Redirected: {from_url} -> {to_url}")
@throttle(1, 1, random)
def http_get(s: requests.Session, url: str, *args, **kwds) -> requests.Response:
req = s.prepare_request(requests.Request("GET", url, *args, **kwds))
if config.debug and config.cachedir:
sig = repr(req.url) # + repr(sorted(req.headers.items()))
cachefile = config.cachedir / md5(sig.encode()).hexdigest()
else:
cachefile = None
if cachefile:
if cachefile.exists():
log.debug(
f"loading {req.url} ({req.headers!r}) from cache {cachefile} ... 💾"
)
with cachefile.open() as fp:
resp = CachedResponse(**json.load(fp))
if 300 <= resp.status_code <= 399:
raise RedirectError(
from_url=resp.url, to_url=resp.headers["location"], is_cached=True
)
return resp
log.debug(f"loading {req.url} ({req.headers!r}) ... ⚡️")
resp = s.send(req, allow_redirects=False, stream=True)
resp.raise_for_status()
if cachefile:
with cachefile.open("w") as fp:
json.dump(
{
"status_code": resp.status_code,
"text": resp.text,
"url": resp.url,
"headers": dict(resp.headers),
},
fp,
)
if resp.is_redirect:
# Redirects could mean trouble, we need to stay on top of that!
raise RedirectError(from_url=resp.url, to_url=resp.headers["location"])
return resp
def soup_from_url(url):
with session() as s:
r = http_get(s, url)
soup = bs4.BeautifulSoup(r.text, "html5lib")
return soup

31
unwind/types.py Normal file
View file

@ -0,0 +1,31 @@
import re
from typing import Union, cast
import ulid
from ulid.hints import Buffer
class ULID(ulid.ULID):
"""Extended ULID type.
Same as ulid.ULID, but allows initializing without a buffer, to make
it easier to use the class as a standard factory.
For more information about ULIDs, see https://github.com/ulid/spec.
"""
_pattern = re.compile(r"^[0-9A-HJKMNP-TV-Z]{26}$")
def __init__(self, buffer: Union[Buffer, ulid.ULID, str, None] = None):
if isinstance(buffer, str):
if not self._pattern.search(buffer):
raise ValueError("Invalid ULID.")
buffer = ulid.from_str(buffer)
assert isinstance(buffer, ulid.ULID)
if isinstance(buffer, ulid.ULID):
buffer = cast(memoryview, buffer.memory)
elif buffer is None:
buffer = cast(memoryview, ulid.new().memory)
super().__init__(buffer)

53
unwind/web.py Normal file
View file

@ -0,0 +1,53 @@
from collections import defaultdict
from starlette.applications import Starlette
from starlette.responses import JSONResponse
from starlette.routing import Route
from . import config
from .db import close_connection_pool, find_ratings, open_connection_pool
def imdb_url(imdb_id: str):
return f"https://www.imdb.com/title/{imdb_id}/"
def truthy(s: str):
return bool(s) and s.lower() in {"1", "yes", "true"}
async def ratings(request):
title = request.query_params.get("title")
media_type = request.query_params.get("media_type")
ignore_tv_episodes = truthy(request.query_params.get("ignore_tv_episodes"))
rows = await find_ratings(
title=title, media_type=media_type, ignore_tv_episodes=ignore_tv_episodes
)
aggr = {}
for r in rows:
mov = aggr.setdefault(
r["movie_imdb_id"],
{
"title": r["movie_title"],
"year": r["release_year"],
"link": imdb_url(r["movie_imdb_id"]),
"user_scores": [],
"imdb_score": r["imdb_score"],
"media_type": r["media_type"],
},
)
mov["user_scores"].append(r["user_score"])
resp = tuple(aggr.values())
return JSONResponse(resp)
app = Starlette(
on_startup=[open_connection_pool],
on_shutdown=[close_connection_pool],
routes=[
Route("/ratings", ratings),
],
)