Commit 4f53b927 authored by Marcel Hellkamp's avatar Marcel Hellkamp
Browse files

First draft for a cdstar client library ans command-line utility framework

parent 36172cd9
Copyright 2019 Marcel Hellkamp
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
include *.txt
include *.md
__version__ = "3.0.dev0"
if __name__ == '__main__':
from .cli import main
import sys
sys.exit(main() or 0)
import os
import requests
from requests_toolbelt import MultipartEncoder
__all__ = ("CDStar", "PostUpdate")
class CDStar:
""" Provide low-level methods for corresponding server-side REST endpoints.
If not documented otherwise, each method call triggers exactly one REST
request. There is no internal caching. The only state that is tracked by
this class is the running transaction, if any.
"""
def __init__(self, url, auth=None, _session=None):
self.url = url.rstrip("/") + '/'
self.auth = auth
self._session = _session or requests.Session()
self._tx = None
def clone(self):
""" Return an independant instance with the same settings.
Other state (e.g. running transaction) is not copied.
"""
return CDStar(self.url, auth=self.auth, _session=self._session)
def _rest(self, method, *path, _expect_status=None, **options):
if self.auth:
options['auth'] = self.auth
if self._tx:
options.setdefault("headers", {})["X-Transaction"] = self._tx['id']
rs = self._session.request(
method, self.url + '/'.join(path), **options)
if rs.ok or (_expect_status and rs.status_code in _expect_status):
return rs
# TODO: handle errors
raise Exception(rs.text)
def begin(self, autocommit=False):
""" Begin a new transaction, return self """
if self._tx:
self.rollback()
self._tx = self._rest("POST", "_tx").json()
self._tx['x-autocommit'] = autocommit
return self
def commit(self):
if not self._tx:
raise RuntimeError("No transaction running")
self._rest("POST", "_tx", self._tx['id'])
self._tx = None
def rollback(self):
""" Rollback the current transaction.
Do nothing if no transaction is active.
"""
if self._tx:
self._rest("DELETE", "_tx", self._tx['id'])
self._tx = None
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
if self._tx:
if exc_type is None and self._tx['x-autocommit']:
self.commit()
else:
self.rollback()
def service_info(self):
return self._rest("GET").json()
def vault_info(self, vault: str):
return self._rest("GET", vault).json()
def create_archive(self, vault, upload: "ComboUpdate" = None):
if upload:
return self._rest("POST", vault, data=upload.form,
headers={'Content-Type': upload.form.content_type}).json()
else:
return self._rest("POST", vault).json()
def update_archive(self, vault, archive, upload: "ComboUpdate"):
return self._rest("POST", vault, archive, data=upload.form,
headers={'Content-Type': upload.form.content_type}).json()
def put_file(self, vault, id, target, source, type=None):
self._rest("PUT", vault, id, target.lstrip("/"),
data=source,
headers={'Content-Type': type or "application/x-autodetect"}
)
class PostUpdate:
""" Builder for CDSTAR POST multipart/form-data requests to upload multiple files or change aspects of an archive.
"""
def __init__(self):
self.fields = []
self._form = None
@property
def form(self):
if not self._form:
self._form = MultipartEncoder(self.fields)
return self._form
def upload(self, target, src, type="application/x-autodetect"):
""" Upload a file (string path or opened file-like object)
:param target: Target within the archive (must start with '/')
:param src: String path to an existing file, or opened file-like object.
:param type: Mime-type of the upload.
:return: self
"""
if not target.startswith("/"):
raise ValueError("Upload target filename MUST start with '/'")
if isinstance(src, str):
self.fields.append((target, (os.path.basename(src), open(src, "rb"), type)))
elif hasattr(src, 'fileno') or hasattr(src, 'getvalue'):
self.fields.append((target, (os.path.basename(src), src, type)))
else:
raise ValueError("Source must be a file path (str), byte buffer or opened file")
return self
def acl(self, subject, *permissions):
""" Set permissions for a subject. Existing permissions for the same subject are replaced.
:param subject: A subject-name, @groupname or one of `$any`, `$user`, `$user`
:param permissions:
:return: self
"""
sub = "acl:{}".format(subject)
self.fields.append((sub, ','.join(permissions)))
return self
def meta(self, field, *values, target=None):
"""
Set metadata for the archive, or a file within the archive.
:param field: Meta-attribute field name. Should start with a schema prefix (e.g. `dc:` for DublinCore)
:param values: Values for this meta attribute.
:param target: File name to attach the metadata to. If not set, it is assigned to the entire archive.
:return: self
"""
attr = "meta:" + field
if target:
attr += ":" + target
if values:
for val in values:
self.fields.append((attr, val))
else:
self.fields.append((attr, ""))
return self
import argparse
import configparser
import importlib
import os
import re
import sys
from ..cdstar import CDStar
__ALL__ = ["main", "register_subcommand", "printer"]
CONFIG_NAMES = ["cdstar.conf"]
#: Commands to load automatically. The module pycdstar.cli.NAME must have register() defined.
BUILDIN_COMMANDS = {"init", "put"}
#: Maps a subcommand name to a callable
__subcommands = dict()
#: Subcommands that only take a single argument and no cdstar instance
__subcommands_noconfig = set()
parser = argparse.ArgumentParser(description='CDSTAR command-line client')
parser.add_argument("-C", default=".", help="Change the current working directory before executing the command")
parser.add_argument("-c", "--config", help="Path to config file (default: find automatically)")
_grp = parser.add_mutually_exclusive_group()
_grp.add_argument("-v", "--verbose", action="count", default=0,
help="Print more info. Repeat to increase verbosity")
_grp.add_argument("-q", "--quiet", action="store_true", help="Be quiet. Only print errors.")
subparsers = parser.add_subparsers(help='sub-command help')
def register_subcommand(name, command, parser_builder=None, aliases=None, help=None, description=None, noconfig=False):
""" Register a sub-command and return an argparse parser for command-specific parameters. """
if name in __subcommands:
raise ImportError("Subcommand {} registered twice".format(name))
__subcommands[name] = command
if noconfig:
__subcommands_noconfig.add(name)
subparser = subparsers.add_parser(name, aliases=aliases or [], help=help, description=description)
if parser_builder:
parser_builder(subparser)
subparser.set_defaults(subcommand=name)
return subparser
class Printer:
""" Helper class to print to stderr based on verbosity levels."""
__slots__ = ("verbosity", "quiet", "file")
def __init__(self, level=0, file=sys.stderr):
self.verbosity = level
self.quiet = level <= -1
self.file = file
set_verbosity = __init__
def __call__(self, msg, *args, **kwargs):
""" Print only if -q (--quiet) was NOT passed as a command-line parameter """
if self.verbosity >= 0:
print(msg.format(*args), file=self.file, **kwargs)
def v(self, msg, *args, **kwargs):
""" Print only if -v was passed as a command-line parameter """
if self.verbosity >= 1:
print(msg.format(*args), file=self.file, **kwargs)
def vv(self, msg, *args, **kwargs):
""" Print only if -vv was passed as a command-line parameter """
if self.verbosity >= 2:
print(msg.format(*args), file=self.file, **kwargs)
def vvv(self, msg, *args, **kwargs):
""" Print only if -vvv was passed as a command-line parameter """
if self.verbosity >= 3:
print(msg.format(*args), file=self.file, **kwargs)
def error(self, msg, *args, **kwargs):
""" Print an error message (if not quiet) and optionally (-vv or higher) a stacktrace."""
self(msg.format(*args), file=self.file, **kwargs)
if self.verbosity >= 2:
import traceback
traceback.print_exc(file=self.file)
def fatal(self, msg, *args, **kwargs):
""" Print an error message (even if quiet) and optionally (-vv or higher) a stacktrace."""
print(msg.format(*args), file=self.file, **kwargs)
if self.verbosity >= 2:
import traceback
traceback.print_exc(file=self.file)
else:
self("Stacktrace not shown. Add -v to print a full stacktrace.")
#: This should be used to print optional messages to the user.
#: Messages are printed to stderr, so only use it for complementary information, not for the primary results.
printer = Printer(level=0, file=sys.stderr)
def main(args=None):
# Load and register all built-in commands
for name in BUILDIN_COMMANDS:
importlib.import_module("."+name, __name__).register()
# Parse command line arguments (may fail)
opts = parser.parse_args(args)
# Set root logging level based on verbosity setting
if opts.quiet:
printer.set_verbosity(-1)
else:
printer.set_verbosity(opts.verbose)
if opts.C != ".":
printer.v("Changing working directory to:", opts.C)
os.chdir(opts.C)
if not hasattr(opts, "subcommand"):
parser.print_help()
return 1
cmd = opts.subcommand
cmdmain = __subcommands[cmd]
try:
if cmd == "init":
return cmdmain(opts) or 0
config = load_config(opts.config) if opts.config else find_config(os.getcwd())
server = config["DEFAULT"]["server"]
vault = config["DEFAULT"]["vault"]
auth = config["DEFAULT"]["auth"]
if auth.startswith("basic:"):
auth = tuple(auth[6:].split(':', 1))
else:
raise ValueError("Unknown authentication setting format (not printed, see config file)")
client = CDStar(server, auth=auth)
return cmdmain(client, vault, opts) or 0
except KeyboardInterrupt:
printer("Exiting...")
return 0
except CliError as e:
printer.fatal(*e.args)
return e.return_code
except Exception as e:
printer.fatal("Uncaught exception. Exiting...")
raise
return 1
def load_config(fname):
printer.vv("Loading config from: {}", fname)
config = configparser.ConfigParser()
config.read(fname)
return config
def find_config(start_dir):
current_dir = os.path.abspath(start_dir)
while os.path.isdir(current_dir):
printer.vvv("Searching for config file in: {}", current_dir)
for name in CONFIG_NAMES:
fname = os.path.join(current_dir, name)
if os.path.exists(fname):
return load_config(fname)
parent = os.path.abspath(os.path.join(current_dir, os.pardir))
if parent == current_dir:
break
current_dir = parent
raise CliError("Could not find '{}' in '{}' or any parent directory.", CONFIG_NAMES[0], start_dir)
def hbytes(n):
for unit in ('B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'):
if abs(n) < 1024.0:
return "{:.1f} {}".format(n, unit)
n /= 1024.0
return "alot"
def compile_glob(pattern):
parts = re.split(r'(\*\*|\*|\?)', pattern)
res = ["^" if pattern.startswith("/") else ".*"]
for i, part in enumerate(parts):
if i % 2 == 0:
res.append(re.escape(part))
elif part == '*':
res.append(r'[^/]+')
elif part == '**':
res.append(r'.+')
elif part == '?':
res.append(r'[^/]')
return re.compile(''.join(res) + "$")
class FileProgress:
def __init__(self, fp, desc=None):
from requests.utils import super_len
self.desc = desc
self.fp = fp
self.len = super_len(fp)
def __iter__(self):
from tqdm import tqdm
with tqdm(total=self.len, unit='b', unit_scale=True, unit_divisor=1024, dynamic_ncols=True,
leave=False) as pbar:
read = self.fp.read
update = pbar.update
while True:
chunk = read(1024 * 64)
update(len(chunk))
if not chunk:
break
yield chunk
pbar.close()
class CliError(Exception):
""" Exception that will cause a clean command-line shutdown without any stack trace.
The message will still be printed. """
def __init__(self, *args, status=1):
super(self, Exception).__init__(*args)
self.return_code = status
"""
Initialize a cdstar working directory.
Create a config file in the current directory, so it can be found by future invocations of cdstar-cli commands.
Settings nor provided as command line arguments ar asked for interactively.
If the main --config parameter is set, the configuration is saved at the specified location,
instead of the current working directory.
"""
import configparser
import os
import re
from . import CONFIG_NAMES, register_subcommand, printer
def register():
parser = register_subcommand("init", command, help=__doc__.splitlines()[0], description=__doc__, noconfig=True)
parser.add_argument("--server", help="CDSTAR server URI (usually ends in '/v3')")
parser.add_argument("--vault", help="Name of the default vault")
grp = parser.add_mutually_exclusive_group()
grp.add_argument("--token", help="Auth token to use")
grp.add_argument("--auth", help="Login credentials as a username:password string")
def command(cdstar, vault, args):
def ask(q, default=None, rx=None):
while True:
val = input("{}? [{}] ".format(q, default) if default else "{}? ".format(q)).strip()
if not val:
if default:
return default
print("No input. Try again...")
continue
if rx and not re.match(rx, val):
print("This does not look right. Try again...")
continue
return val
target = os.path.abspath(args.config or CONFIG_NAMES[0])
if os.path.exists(target):
raise ValueError("Config file '{}' already exists. Modify it with an editor.".format(target))
server = args.server or ask("Server URI", "https://cdstar.gwdg.de/demo/v3", r"^https?://.*/v3$")
vault = args.vault or ask("Default vault name", "MyVault", r"^.+$")
if args.token:
auth = ("token", args.token)
elif args.auth:
auth = ("basic", args.auth)
else:
if ask("Auth method (basic or token)", "basic", r"^(basic|token)$") == "basic":
auth = ("basic", ask("Username") + ":" + ask("Password"))
config = configparser.ConfigParser()
config["DEFAULT"]["server"] = server
config["DEFAULT"]["vault"] = vault
config["DEFAULT"]["auth"] = ':'.join(auth)
with open(target, 'w') as fp:
fp.write("# cdstar-cli config. See https://cdstar.gwdg.de/\n".format(sys.argv[0]))
config.write(fp)
printer("Okay!")
printer("")
printer("Config file written to:", target)
"""
Upload one or more files to an existing archive.
"""
import os
from . import register_subcommand, compile_glob, FileProgress, hbytes, printer, CliError
def register():
parser = register_subcommand("put", command, help=__doc__.splitlines()[0], description=__doc__)
parser.add_argument("-r", "--recursive", action="store_true", help="Upload entire directories")
parser.add_argument("-a", "--all", action="store_true", help="Include hidden files (skipped by default)")
parser.add_argument("-n", "--dry-run", action="store_true",
help="Do not upload anything, just print what would have been uploaded")
parser.add_argument("--flat", action="store_true",
help="Strip all directory names from the local path and store all files into the root path (e.g. ./path/to/file.txt would be uploaded as /file.txt)")
parser.add_argument("-x", "--exclude", metavar="GLOB", action="append", help="Exclude files by glob pattern")
parser.add_argument("-i", "--include", metavar="GLOB", action="append",
help="Include files by glob pattern (default: all)")
parser.add_argument("-p", "--progress", action="store_true", help="Show progress")
parser.add_argument("archive", help="Archive ID, or 'new' to create a new archive")
parser.add_argument("file", nargs='+', help="File(s) (or directories) to upload")
parser.set_defaults(cmd=command)
def findfiles(flist, recursive=False, include_hidden=False):
def keep(name):
return include_hidden or not name.startswith('.')
for entry in flist:
if os.path.isfile(entry):
yield entry
elif os.path.isdir(entry) and recursive:
for (root, dirs, files) in os.walk(entry):
dirs[:] = filter(keep, dirs)
files[:] = filter(keep, files)
for file in files:
yield os.path.join(root, file)
else:
raise CliError("Not a file: " + entry)
def command(cdstar, vault, args):
archive = args.archive
inc = [compile_glob(rule).match for rule in args.include or []]
exc = [compile_glob(rule).match for rule in args.exclude or []]
progress = args.progress
uploads = {}
total = 0
files = findfiles(args.file, args.recursive, args.all)
for file in files:
if inc and not any(rule(file) for rule in inc):
printer.vv("Skipping: {} (not included)", file)
continue
if any(rule(file) for rule in exc):
printer.vv("Skipping: {} (excluded)", file)
continue
target = os.path.join("/", os.path.relpath(file))
if args.flat:
target = "/" + os.path.basename(target)
if target in uploads and uploads[target][0] != file:
raise ValueError("File included twice: {} and {} both map to {}".format(file, uploads[target][0], target))
stat = os.stat(file)
total += stat.st_size
uploads[target] = (file, stat)
if args.dry_run:
for target in sorted(uploads):
print("{}".format(target))
printer("\nWould upload {} files ({}) to {}",
len(uploads), hbytes(total), "new archive" if archive == 'new' else "archive: " + vault + "/" + archive)
return
with cdstar.begin(autocommit=True) as ctx:
if archive == 'new':
archive = cdstar.create_archive(vault)['id']
printer("Uploading {} files ({}) to new archive: {}/{}", len(uploads), hbytes(total), vault, archive)
else:
printer("Uploading {} files ({}) to archive: {}/{}", len(uploads), hbytes(total), vault, archive)
for i, target in enumerate(sorted(uploads)):
file, stat = uploads[target]
with open(file, 'rb') as fp:
printer("[{}/{}] {}", i + 1, len(uploads), target)
if progress and not printer.quiet:
fp = FileProgress(fp, printer.file)
cdstar.put_file(vault, archive, target, fp)
printer("Done")
from distutils.core import setup
import pycdstar
def read(name):
with open(name, 'r') as fp:
return fp.read().strip()
setup(
name='pycdstar',
version=pycdstar.__version__,
author='Marcel Hellkamp',
author_email='marc@gsites.de',
packages=['pycdstar', ],
license='LICENSE.txt',
description="Library and command-line client to access CDSTAR (3.0+)",
long_description=read('README.md'),
install_requires=read('requirements.txt').splitlines(),
)