unwind/scripts/load_imdb_dumps

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

19 lines
458 B
Text
Raw Normal View History

2021-06-21 18:54:03 +02:00
#!/bin/sh -eu
2021-07-21 22:34:03 +02:00
datadir="$RUN_DIR"/data/imdb
2021-06-21 18:54:03 +02:00
[ -z "${DEBUG:-}" ] || set -x
# See
# - https://www.imdb.com/interfaces/
# - https://datasets.imdbws.com/
wget -N \
--no-directories \
--directory-prefix "$datadir" \
https://datasets.imdbws.com/title.basics.tsv.gz \
https://datasets.imdbws.com/title.ratings.tsv.gz
"$RUN_BIN" app import-imdb-dataset \
--basics "$datadir"/title.basics.tsv.gz \
--ratings "$datadir"/title.ratings.tsv.gz