add rclone importer
This commit is contained in:
parent
af1236bc7e
commit
954562881a
4 changed files with 111 additions and 6 deletions
|
|
@ -63,7 +63,7 @@ def getargs():
|
|||
|
||||
# Command: scan
|
||||
|
||||
with command_parser("scan", help="scan a local file system") as subparser:
|
||||
with command_parser("scan", help="Import from a local file system.") as subparser:
|
||||
|
||||
subparser.add_argument(
|
||||
"basedir",
|
||||
|
|
@ -89,7 +89,7 @@ def getargs():
|
|||
|
||||
with command_parser(
|
||||
"ingest-ls",
|
||||
help="ingest extra data",
|
||||
help="Import from `ls -lR`.",
|
||||
description="When ingesting data from an external source, the hostname will not be set automatically.",
|
||||
) as subparser:
|
||||
|
||||
|
|
@ -113,7 +113,9 @@ def getargs():
|
|||
|
||||
# Command: ingest-db
|
||||
|
||||
with command_parser("ingest-db") as subparser:
|
||||
with command_parser(
|
||||
"ingest-db", help="Import from a metadex.sqlite file."
|
||||
) as subparser:
|
||||
|
||||
subparser.add_argument(
|
||||
"infile",
|
||||
|
|
@ -128,9 +130,33 @@ def getargs():
|
|||
help="map a source host:path to any other destination while importing",
|
||||
)
|
||||
|
||||
with command_parser(
|
||||
"ingest-rclone-json", help="Import from `rclone lsjson`."
|
||||
) as subparser:
|
||||
|
||||
subparser.add_argument(
|
||||
"infile",
|
||||
nargs="?",
|
||||
type=argparse.FileType(),
|
||||
default=sys.stdin,
|
||||
help="output from `rclone lsjson`",
|
||||
)
|
||||
subparser.add_argument(
|
||||
"--remote-base",
|
||||
nargs=1,
|
||||
required=True,
|
||||
type=str,
|
||||
help="output from `rclone lsjson`",
|
||||
)
|
||||
subparser.add_argument(
|
||||
"--remove-missing",
|
||||
action="store_true",
|
||||
help="Remove files not listed in the infile.",
|
||||
)
|
||||
|
||||
# Command: rm
|
||||
|
||||
with command_parser("rm") as subparser:
|
||||
with command_parser("rm", help="Remove files from the index.") as subparser:
|
||||
subparser.add_argument(
|
||||
"files",
|
||||
type=str,
|
||||
|
|
@ -146,7 +172,7 @@ def getargs():
|
|||
|
||||
# Command: ls
|
||||
|
||||
with command_parser("ls") as subparser:
|
||||
with command_parser("ls", help="Search indexed files.") as subparser:
|
||||
subparser.add_argument(
|
||||
"file",
|
||||
type=str,
|
||||
|
|
@ -175,6 +201,9 @@ def getargs():
|
|||
args.infile = utils.abspath(args.infile)
|
||||
elif args.mode == "ingest-ls":
|
||||
config.hostname = None
|
||||
elif args.mode == "ingest-rclone-json":
|
||||
config.hostname = None
|
||||
args.remote_base = args.remote_base[0]
|
||||
elif args.mode is None:
|
||||
parser.print_help()
|
||||
parser.exit(1, "Error: No command selected.")
|
||||
|
|
@ -182,6 +211,24 @@ def getargs():
|
|||
return args
|
||||
|
||||
|
||||
@command("ingest-rclone-json")
|
||||
def cmd_ingest_rclone_json(args):
|
||||
metadex.init(args.db)
|
||||
|
||||
log.info("Ingesting rclone JSON file %a ...", args.infile.name)
|
||||
context = metadex.ingest_rclone_json(
|
||||
args.infile,
|
||||
ignore_file=args.ignore_from,
|
||||
remote_base=args.remote_base,
|
||||
remove_missing=args.remove_missing,
|
||||
)
|
||||
|
||||
metadex.close()
|
||||
|
||||
msg = f"Checked {context.seen} files, {context.added} new, {context.changed} changed, {context.ignored} ignored, {context.removed} removed"
|
||||
print(msg.ljust(metadex._terminal_width))
|
||||
|
||||
|
||||
@command("ingest-ls")
|
||||
def cmd_ingest_ls(args):
|
||||
metadex.init(args.db)
|
||||
|
|
|
|||
|
|
@ -373,6 +373,7 @@ def upsert_if_changed(conn: Connection, new_data: dict):
|
|||
return "unchanged"
|
||||
|
||||
log.info("File changed: %a:%a", new_data["hostname"], new_data["location"])
|
||||
log.debug("New data: %a Previous: %a", new_data, row._mapping)
|
||||
|
||||
# changelog = []
|
||||
# for f in ("stat_bytes", "stat_modified", "stat_type"):
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
|
|
@ -5,6 +6,7 @@ import sys
|
|||
import time
|
||||
from collections import deque
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from shutil import get_terminal_size
|
||||
from typing import Iterable, Literal, TextIO
|
||||
|
|
@ -356,6 +358,62 @@ def ingest_db_file(
|
|||
return context
|
||||
|
||||
|
||||
def _naive_fromisoformat(string, /):
|
||||
if string.endswith("Z"):
|
||||
string = string[:-1] + "+00:00"
|
||||
return (
|
||||
datetime.fromisoformat(string).astimezone(tz=timezone.utc).replace(tzinfo=None)
|
||||
)
|
||||
|
||||
|
||||
def _parse_rclone_json(file: TextIO, *, remote_base: str) -> Iterable[dict]:
|
||||
remote_path = Path("/") / remote_base
|
||||
for item in json.load(file):
|
||||
# {"Path":"/foo/bar","Name":"bar","Size":-1,"MimeType":"inode/directory","ModTime":"2022-08-11T22:44:35+02:00","IsDir":true},
|
||||
if item["Path"] == "..":
|
||||
continue
|
||||
d = dict(
|
||||
location=str(remote_path / item["Path"]),
|
||||
hostname=config.hostname,
|
||||
stat_bytes=size if (size := item["Size"]) != -1 else 0,
|
||||
stat_modified=_naive_fromisoformat(item["ModTime"]),
|
||||
stat_type="d" if item["IsDir"] else "f",
|
||||
)
|
||||
yield d
|
||||
|
||||
|
||||
def ingest_rclone_json(
|
||||
file: TextIO,
|
||||
*,
|
||||
ignore_file: Path,
|
||||
remote_base: str,
|
||||
remove_missing: bool = False,
|
||||
) -> _LogContext:
|
||||
is_ignored = ignore.parse(ignore_file)
|
||||
|
||||
context = _LogContext()
|
||||
|
||||
with db.transaction() as conn:
|
||||
|
||||
for d in _parse_rclone_json(file, remote_base=remote_base):
|
||||
|
||||
context.seen += 1
|
||||
|
||||
_log_context(d["location"], context)
|
||||
|
||||
if is_ignored(d["location"]):
|
||||
log.debug("Skipping ignored entry: %a:%a", d["hostname"], d["location"])
|
||||
context.ignored += 1
|
||||
continue
|
||||
|
||||
if (action := db.upsert_if_changed(conn, d)) == "added":
|
||||
context.added += 1
|
||||
elif action == "changed":
|
||||
context.changed += 1
|
||||
|
||||
return context
|
||||
|
||||
|
||||
def ingest_ls(
|
||||
file: TextIO,
|
||||
*,
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
_size_quantifiers = "BKMGTP"
|
||||
_size_map: "dict[str, int]" = {
|
||||
_size_quantifiers[i]: 2 ** (10 * i) for i in range(len(_size_quantifiers))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue