From d99565148752e0ff9cec3de1f6d7a0f1e2add2cb Mon Sep 17 00:00:00 2001 From: ducklet Date: Tue, 21 Feb 2023 18:28:13 +0100 Subject: [PATCH] fix file matching for files with non-utf8 encodings We make sure all filenames are using UTF-8 encoding before committing them to the database. For filenames using a different encoding we convert offending chars to `?` (using `str.encode(errors="replace")`). When trying to match files from the local filesystem with files from the database we thus have to use the mangled filenames, not the original ones. --- metadex/metadex.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metadex/metadex.py b/metadex/metadex.py index 8f1783c..3ef5b81 100644 --- a/metadex/metadex.py +++ b/metadex/metadex.py @@ -202,7 +202,7 @@ def _scan_remove_missing( if f.is_dir(follow_symlinks=False): append(Path(f.path)) - expected.discard(f.name) + expected.discard(Path(d["location"]).name) # `subdirs` sorts all changed dirs to the right, which means when we # extend `dirs` using `extendleft` it'll put them all left-most. @@ -561,7 +561,7 @@ def _ingest_ls_remove_missing( elif action == "changed": context.changed += 1 - expected.discard(f.path.name) + expected.discard(Path(d["location"]).name) db.recalculate_dir_sizes(conn)