From 9f6baa99b0a6b0a5ec42b46ab0142c68b187820a Mon Sep 17 00:00:00 2001 From: ducklet Date: Sun, 4 Jul 2021 18:47:26 +0200 Subject: [PATCH] optimize runtime A `RatingRow`'s `id` and `updated` are never used, but creating them a million+ times is quite expensive, so initializing them with `None` saves a lot of time. `dataclasses`' `fields` function is also quite expensive; loading the fields from a row directly saves a lot of CPU cycles. --- unwind/imdb_import.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/unwind/imdb_import.py b/unwind/imdb_import.py index cf065ac..56d91fb 100644 --- a/unwind/imdb_import.py +++ b/unwind/imdb_import.py @@ -83,7 +83,7 @@ class RatingRow: @classmethod def from_row(cls, row): - inst = cls(*(f.type(r) for f, r in zip(fields(cls), row))) + inst = cls(tconst=row[0], averageRating=float(row[1]), numVotes=int(row[2])) assert inst.tconst != r"\N" return inst @@ -91,6 +91,8 @@ class RatingRow: return Movie( imdb_id=self.tconst, score=score_from_imdb_rating(self.averageRating), + updated=None, # optimization: skip default factory + id=None, # optimization: skip default factory )