From e9a58ed40e8e49d514172ac98fd6f4d534e0af3a Mon Sep 17 00:00:00 2001 From: ducklet Date: Sat, 11 May 2024 17:38:16 +0200 Subject: [PATCH] fix: movie updates were not imported because of a broken timestamp IMDb changed their exports. The timestamp of the file in their gzipped export is now always 0, i.e. 1970-01-01T00:00:00Z. --- unwind/imdb_import.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/unwind/imdb_import.py b/unwind/imdb_import.py index 2eeb33c..5464df0 100644 --- a/unwind/imdb_import.py +++ b/unwind/imdb_import.py @@ -101,12 +101,10 @@ title_types = { } -def gz_mtime(path: Path) -> datetime: - """Return the timestamp of the compressed file.""" - g = gzip.GzipFile(path, "rb") - g.peek(1) # start reading the file to fill the timestamp field - assert g.mtime is not None - return datetime.fromtimestamp(g.mtime).replace(tzinfo=timezone.utc) +def _mtime(path: Path) -> datetime: + """Return the timestamp of the file.""" + mtime = path.stat().st_mtime + return datetime.fromtimestamp(mtime, tz=timezone.utc) def count_lines(path: Path) -> int: @@ -160,7 +158,7 @@ def read_imdb_tsv(path: Path, row_type, *, unpack=True): def read_ratings(path: Path): - mtime = gz_mtime(path) + mtime = _mtime(path) rows = read_imdb_tsv(path, RatingRow) for row in rows: @@ -176,7 +174,7 @@ def read_ratings_as_mapping(path: Path): def read_basics(path: Path) -> Generator[Movie | None, None, None]: - mtime = gz_mtime(path) + mtime = _mtime(path) rows = read_imdb_tsv(path, BasicRow) for row in rows: