From 69643455a66adabfaa6cc3917233bfb9de21442f Mon Sep 17 00:00:00 2001 From: ducklet Date: Sat, 4 Feb 2023 14:12:36 +0100 Subject: [PATCH] IMDb import: fix progress reporting Because we calculated the percentage based on the number of lines we need to yield once per line, otherwise the count is off. --- unwind/imdb_import.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/unwind/imdb_import.py b/unwind/imdb_import.py index 7e55b62..bccb18a 100644 --- a/unwind/imdb_import.py +++ b/unwind/imdb_import.py @@ -177,13 +177,14 @@ def read_ratings_as_mapping(path: Path): return {r[0]: (round(100 * (float(r[1]) - 1) / 9), int(r[2])) for r in rows} -def read_basics(path: Path): +def read_basics(path: Path) -> Generator[Movie | None, None, None]: mtime = gz_mtime(path) rows = read_imdb_tsv(path, BasicRow) for row in rows: if row.startYear is None: log.debug("Skipping movie, missing year: %s", row) + yield None continue m = row.as_movie() @@ -210,6 +211,9 @@ async def import_from_file(*, basics_path: Path, ratings_path: Path): log.info("⏳ Imported %s%%", round(perc, 1)) perc_next_report += perc_step + if m is None: + continue + if m.media_type not in { "Movie", "Short", @@ -234,6 +238,7 @@ async def import_from_file(*, basics_path: Path, ratings_path: Path): await add_or_update_many_movies(chunk) chunk = [] + log.info("👍 Imported 100%") await db.set_import_progress(100)