fix file matching for files with non-utf8 encodings

We make sure all filenames are using UTF-8 encoding before committing
them to the database.  For filenames using a different encoding we
convert offending chars to `?` (using `str.encode(errors="replace")`).
When trying to match files from the local filesystem with files from the
database we thus have to use the mangled filenames, not the original
ones.
This commit is contained in:
ducklet 2023-02-21 18:28:13 +01:00
parent d8afd3a293
commit d995651487

View file

@ -202,7 +202,7 @@ def _scan_remove_missing(
if f.is_dir(follow_symlinks=False): if f.is_dir(follow_symlinks=False):
append(Path(f.path)) append(Path(f.path))
expected.discard(f.name) expected.discard(Path(d["location"]).name)
# `subdirs` sorts all changed dirs to the right, which means when we # `subdirs` sorts all changed dirs to the right, which means when we
# extend `dirs` using `extendleft` it'll put them all left-most. # extend `dirs` using `extendleft` it'll put them all left-most.
@ -561,7 +561,7 @@ def _ingest_ls_remove_missing(
elif action == "changed": elif action == "changed":
context.changed += 1 context.changed += 1
expected.discard(f.path.name) expected.discard(Path(d["location"]).name)
db.recalculate_dir_sizes(conn) db.recalculate_dir_sizes(conn)