optimize runtime

A `RatingRow`'s `id` and `updated` are never used, but creating them
a million+ times is quite expensive, so initializing them with `None`
saves a lot of time.
`dataclasses`' `fields` function is also quite expensive; loading the
fields from a row directly saves a lot of CPU cycles.
This commit is contained in:
ducklet 2021-07-04 18:47:26 +02:00
parent 1038b4eaff
commit 9f6baa99b0

View file

@ -83,7 +83,7 @@ class RatingRow:
@classmethod @classmethod
def from_row(cls, row): def from_row(cls, row):
inst = cls(*(f.type(r) for f, r in zip(fields(cls), row))) inst = cls(tconst=row[0], averageRating=float(row[1]), numVotes=int(row[2]))
assert inst.tconst != r"\N" assert inst.tconst != r"\N"
return inst return inst
@ -91,6 +91,8 @@ class RatingRow:
return Movie( return Movie(
imdb_id=self.tconst, imdb_id=self.tconst,
score=score_from_imdb_rating(self.averageRating), score=score_from_imdb_rating(self.averageRating),
updated=None, # optimization: skip default factory
id=None, # optimization: skip default factory
) )