From e9a58ed40e8e49d514172ac98fd6f4d534e0af3a Mon Sep 17 00:00:00 2001
From: ducklet <ducklet@noreply.code.dumpr.org>
Date: Sat, 11 May 2024 17:38:16 +0200
Subject: [PATCH] fix: movie updates were not imported because of a broken
 timestamp

IMDb changed their exports. The timestamp of the file in their gzipped
export is now always 0, i.e. 1970-01-01T00:00:00Z.
---
 unwind/imdb_import.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/unwind/imdb_import.py b/unwind/imdb_import.py
index 2eeb33c..5464df0 100644
--- a/unwind/imdb_import.py
+++ b/unwind/imdb_import.py
@@ -101,12 +101,10 @@ title_types = {
 }
 
 
-def gz_mtime(path: Path) -> datetime:
-    """Return the timestamp of the compressed file."""
-    g = gzip.GzipFile(path, "rb")
-    g.peek(1)  # start reading the file to fill the timestamp field
-    assert g.mtime is not None
-    return datetime.fromtimestamp(g.mtime).replace(tzinfo=timezone.utc)
+def _mtime(path: Path) -> datetime:
+    """Return the timestamp of the file."""
+    mtime = path.stat().st_mtime
+    return datetime.fromtimestamp(mtime, tz=timezone.utc)
 
 
 def count_lines(path: Path) -> int:
@@ -160,7 +158,7 @@ def read_imdb_tsv(path: Path, row_type, *, unpack=True):
 
 
 def read_ratings(path: Path):
-    mtime = gz_mtime(path)
+    mtime = _mtime(path)
     rows = read_imdb_tsv(path, RatingRow)
 
     for row in rows:
@@ -176,7 +174,7 @@ def read_ratings_as_mapping(path: Path):
 
 
 def read_basics(path: Path) -> Generator[Movie | None, None, None]:
-    mtime = gz_mtime(path)
+    mtime = _mtime(path)
     rows = read_imdb_tsv(path, BasicRow)
 
     for row in rows: