Commit 5ad25cea authored by Marcel Hellkamp's avatar Marcel Hellkamp
Browse files

Prototyping

parent 88273d3e
......@@ -15,19 +15,30 @@ and newer.
`cdstar3` is a command-line toolbox to upload, download or manage data in a CDSTAR repository.
Please note that `cdstar3` was designed to be used by humans, not scripts. The output is mostly human-friendly and may
change between releases. If you want to automate CDSTAR, consider implementing your tools using the `pycdstar3` client library, or directly against the stable CDSTAR REST API.
Please note that `cdstar3` was designed to be used by humans, not scripts. The output is mostly human-friendly and may change between releases. If you want to automate CDSTAR, consider implementing your tools using the `pycdstar3` client library, or directly against the stable CDSTAR REST API.
### Configuration
The `cdstar3` client needs to know which server to connect to and which vault to use per default. This information (and more) is defined in a file named `cdstar.conf`. If no such file is specified via the `-c` parameter, `cdstar3` will look for a `cdstar.conf` in the current working directory, and then in any of its parent directories. As a last resort, certain system-dependent user folders are searched (e.g. `~/.config/cdstar3/` on linux). You can create a configuration file with `cdstar3 init`.
### Target Strings
A single CDSTAR server might host multiple vaults, each containing any number of archives, each containing multiple files. These can be referenced using their full resource URI (e.g. `http(s)://$server/v3/$vault/$archive/$file`) but that would be a lot of typing if you are mostly working with a single vault at a time. For this reason, a default server and vault can be configured in your `cdstar.conf` and the target strings will get a lot shorter: Archives can be references by their ID alone, and files by `$archive/$file`. If you want to specify a different vault, you can use the slight longer `/$vault/$archive` or `/$vault/$archive/$file` forms. Notice the leading `/` if you specify a vault. Even with a `cdstar.conf` present, you can still use the full URI if needed.
### Usage
This is an (incomplete) list of commands and their most important parameters. For a complete list, run `cdstar3 -h` and for details, see `cdstar3 COMMAND -h`.
* **`init`**: Ask for server address, vault, credentials and other config options and create a `cdstar.conf` file in the current directory.
* **`put ID [path]`** Upload one or more files or folders to an archive.
* **`get ID/FILE (path)`** Download a single file. If no path is specified, it is streamed to stdout.
* **`info ID[/FILE]`** Get information about an archive or file.
* **`meta get ID[/FILE] (FIELD)`** Get metadata about an archive or file.
* **`meta set ID[/FILE] [FIELD=VALUE]`** SET metadata about an archive or file.
###########################################
High-level commands work with local archive directories (one per archive). When creating a new archive or recovering an existing archive for the first time, `cdstar3` will create a hidden `.cdstar` folder within the target directory and remember the exact location (server, vault and id) of the remote archive. Do NOT delete this folder, or the correlation between your local copy and the remote archive is lost.
* **`archive DIR`**: Create a new remote archive from a local directory.
......
......@@ -17,6 +17,7 @@ class CDStar:
def __init__(self, url, auth=None, _session=None):
self.url = url.rstrip("/") + '/'
self.auth = auth
self.headers = {}
self._session = _session or requests.Session()
self._tx = None
......@@ -32,8 +33,8 @@ class CDStar:
if self.auth:
options['auth'] = self.auth
if self._tx:
options.setdefault("headers", {})["X-Transaction"] = self._tx['id']
if self.headers:
options.setdefault("headers", {}).update(self.headers)
rs = self._session.request(
method, self.url + '/'.join(path), **options)
......@@ -43,38 +44,15 @@ class CDStar:
# TODO: handle errors
raise Exception(rs.text)
def begin(self, autocommit=False):
""" Begin a new transaction, return self """
if self._tx:
self.rollback()
self._tx = self._rest("POST", "_tx").json()
self._tx['x-autocommit'] = autocommit
return self
def commit(self):
if not self._tx:
raise RuntimeError("No transaction running")
self._rest("POST", "_tx", self._tx['id'])
self._tx = None
def rollback(self):
""" Rollback the current transaction.
Do nothing if no transaction is active.
"""
if self._tx:
self._rest("DELETE", "_tx", self._tx['id'])
self._tx = None
def __enter__(self):
return self
@property
def tx(self):
return self._tx
def __exit__(self, exc_type, exc_value, traceback):
def begin(self, autocommit=False, readonly=True):
if self._tx:
if exc_type is None and self._tx['x-autocommit']:
self.commit()
else:
self.rollback()
self._tx.rollback()
self._tx = TxHandle(self, autocommit=autocommit, readonly=readonly)
return self._tx
def service_info(self):
return self._rest("GET").json()
......@@ -100,6 +78,59 @@ class CDStar:
)
class TxHandle:
def __init__(self, client, autocommit=False, readonly=True):
self.client = client
self.autocommit = autocommit
self.readonly=readonly
self._tx = None
def __enter__(self):
if self.client._tx:
self.client._tx.rollback()
self._tx = self.client._rest("POST", "_tx", data={"readonly": self.readonly}).json()
self._tx['x-autocommit'] = self.autocommit
self.client._tx = self
return self
def __exit__(self, exc_type, exc_value, traceback):
if self._tx and self.client._tx is self:
if exc_type is None and self._tx['x-autocommit']:
self.commit()
else:
self.rollback()
self.client._tx = None
@property
def id(self):
return self._tx['id']
@property
def is_running(self):
return bool(self._tx)
def renew(self):
if not self._tx:
raise RuntimeError("No transaction running")
self.client._rest("GET", "_tx", self._tx['id'])
def commit(self):
if not self._tx:
raise RuntimeError("No transaction running")
self.client._rest("POST", "_tx", self._tx['id'])
self._tx = None
def rollback(self):
""" Rollback the current transaction.
Do nothing if no transaction is active.
"""
if self._tx:
self.client._rest("DELETE", "_tx", self._tx['id'])
self._tx = None
class PostUpdate:
""" Builder for CDSTAR POST multipart/form-data requests to upload multiple files or change aspects of an archive.
"""
......
......@@ -4,10 +4,6 @@ import importlib
import os
import re
import sys
from typing import Tuple, Dict
from .context import CliContext
from ..cdstar import CDStar
__ALL__ = ["main", "register_subcommand", "printer"]
......@@ -89,7 +85,7 @@ class Printer:
import traceback
traceback.print_exc(file=self.file)
else:
self("Stacktrace not shown. Add -v to print a full stacktrace.")
self("Stacktrace not shown. Add -vv to print a full stacktrace.")
#: This should be used to print optional messages to the user.
......@@ -112,7 +108,7 @@ def main(args=None):
printer.set_verbosity(opts.verbose)
if opts.C != ".":
printer.v("Changing working directory to:", opts.C)
printer.v("Changing working directory to: {}", opts.C)
os.chdir(opts.C)
if not hasattr(opts, "subcommand"):
......@@ -124,6 +120,8 @@ def main(args=None):
try:
ctx = CliContext(workdir=".")
if opts.config:
ctx.load_config(opts.config)
return cmdmain(ctx, opts) or 0
except KeyboardInterrupt:
printer("Exiting...")
......@@ -132,13 +130,10 @@ def main(args=None):
printer.fatal(*e.args)
return e.return_code
except Exception as e:
printer.fatal("Uncaught exception. Exiting...")
printer.fatal("Uncaught exception ({}). Exiting...", e)
return 1
class CliError(Exception):
""" Exception that will cause a clean command-line shutdown without any stack trace.
......@@ -147,3 +142,5 @@ class CliError(Exception):
def __init__(self, *args, status=1):
super().__init__(*args)
self.return_code = status
from .context import CliContext
......@@ -12,8 +12,8 @@ import configparser
import os
import re
from cli import CONFIG_NAMES, register_subcommand, printer
from .. import register_subcommand, printer
from ..context import CONFIG_NAMES
def register():
parser = register_subcommand("init", command, help=__doc__.splitlines()[0], description=__doc__, noconfig=True)
......
......@@ -3,36 +3,37 @@ Upload one or more files to an existing archive.
"""
import os
from cli import register_subcommand, compile_glob, FileProgress, hbytes, printer, CliError
from ..context import VerboseTx
from .. import register_subcommand, printer, CliError
from .._utils import compile_glob, hbytes
def register():
parser = register_subcommand("put", command, help=__doc__.splitlines()[0], description=__doc__)
parser.add_argument("-r", "--recursive", action="store_true", help="Upload entire directories")
parser.add_argument("-a", "--all", action="store_true", help="Include hidden files (skipped by default)")
parser.add_argument("-n", "--dry-run", action="store_true",
help="Do not upload anything, just print what would have been uploaded")
parser.add_argument("--flat", action="store_true",
help="Strip all directory names from the local path and store all files into the root path (e.g. ./path/to/file.txt would be uploaded as /file.txt)")
parser.add_argument("-x", "--exclude", metavar="GLOB", action="append", help="Exclude files by glob pattern")
parser.add_argument("-x", "--exclude", metavar="GLOB", action="append",
help="Exclude files by glob pattern")
parser.add_argument("-i", "--include", metavar="GLOB", action="append",
help="Include files by glob pattern (default: all)")
parser.add_argument("-p", "--progress", action="store_true",
help="Show progress bar for large files or slow uploads")
parser.add_argument("archive", help="Archive ID, or 'new' to create a new archive")
parser.add_argument("file", nargs='+', help="File(s) (or directories) to upload")
parser.add_argument("TARGET", help="Archive ID, or 'new' to create a new archive")
parser.add_argument("PATH", nargs='+', help="Files or directories to upload")
parser.set_defaults(cmd=command)
def findfiles(flist, recursive=False, include_hidden=False):
def findfiles(flist, include_hidden=False):
def keep(name):
return include_hidden or not name.startswith('.')
for entry in flist:
if os.path.isfile(entry):
yield entry
elif os.path.isdir(entry) and recursive:
elif os.path.isdir(entry):
for (root, dirs, files) in os.walk(entry):
dirs[:] = filter(keep, dirs)
files[:] = filter(keep, files)
......@@ -43,15 +44,17 @@ def findfiles(flist, recursive=False, include_hidden=False):
def command(ctx, args):
archive = args.archive
vault = ctx.default_vault
client, vault, archive, rfile = ctx.resolve(args.TARGET)
inc = [compile_glob(rule).match for rule in args.include or []]
exc = [compile_glob(rule).match for rule in args.exclude or []]
progress = args.progress
if not rfile:
rfile = '/'
uploads = {}
total = 0
files = findfiles(args.file, args.recursive, args.all)
files = findfiles(args.PATH, args.all)
for file in files:
if inc and not any(rule(file) for rule in inc):
printer.vv("Skipping: {} (not included)", file)
......@@ -60,9 +63,16 @@ def command(ctx, args):
printer.vv("Skipping: {} (excluded)", file)
continue
target = os.path.join("/", os.path.relpath(file))
target = os.path.relpath(file)
if args.flat:
target = "/" + os.path.basename(target)
target = os.path.basename(target)
if rfile.endswith("/"):
target = os.path.join(rfile, target)
else:
target = rfile
if target != os.path.normpath(target):
raise ValueError("Unable to create remote file with relative or empty path segments: {}".format(target))
if target in uploads and uploads[target][0] != file:
raise ValueError("File included twice: {} and {} both map to {}".format(file, uploads[target][0], target))
stat = os.stat(file)
......@@ -76,9 +86,9 @@ def command(ctx, args):
len(uploads), hbytes(total), "new archive" if archive == 'new' else "archive: " + vault + "/" + archive)
return
with ctx.client.begin(autocommit=True) as ctx:
with VerboseTx(client.begin(autocommit=True)) as tx:
if archive == 'new':
archive = ctx.client.create_archive(vault)['id']
archive = client.create_archive(vault)['id']
printer("Uploading {} files ({}) to new archive: {}/{}", len(uploads), hbytes(total), vault, archive)
else:
printer("Uploading {} files ({}) to archive: {}/{}", len(uploads), hbytes(total), vault, archive)
......@@ -90,17 +100,22 @@ def command(ctx, args):
pbar = tqdm(total=total, unit='b', unit_scale=True, unit_divisor=1024, dynamic_ncols=True,
file=printer.file)
for i, target in enumerate(sorted(uploads)):
file, stat = uploads[target]
with open(file, 'rb') as fp:
line = "[{}/{}] {}".format(i + 1, len(uploads), target[1:])
if pbar:
pbar.write(line)
read = fp.read
chunks = iter(lambda: read(1024*8), b'')
chunks = (chunk for chunk in chunks if not pbar.update(len(chunk)))
fp = chunks
else:
printer(line)
ctx.client.put_file(vault, archive, target, fp)
try:
for i, target in enumerate(sorted(uploads)):
file, stat = uploads[target]
with open(file, 'rb') as fp:
line = "[{}/{}] {}".format(i + 1, len(uploads), target[1:])
if pbar:
pbar.write(line)
read = fp.read
chunks = iter(lambda: read(1024*8), b'')
chunks = (chunk for chunk in chunks if not pbar.update(len(chunk)))
fp = chunks
else:
printer(line)
client.put_file(vault, archive, target, fp)
finally:
if pbar:
pbar.close()
printer("Done")
import os
import urllib.parse
from contextlib import contextmanager
from . import printer, CliError
from ._utils import walk_up
......@@ -13,50 +14,103 @@ class CliContext:
""" Search for config files and archive directory in or above the current working directory. """
def __init__(self, workdir="."):
self.connected = {}
self.workdir = os.path.abspath(workdir)
self.archive_root = None
self.config = None
self._client = None
self._config = None
home = os.path.abspath(os.path.expanduser("~"))
def load_config(self, fname):
self._config = Config(fname)
@property
def config(self):
if not self._config:
self._find_config()
if not self._config:
raise CliError("No config found (see `init` command)")
return self._config
def _find_config(self):
for path in walk_up(self.workdir):
if not self.archive_root:
if os.path.exists(os.path.join(path, METADIR_NAME)):
printer.vv("Found archive directory: {}", path)
self.archive_root = path
if not self.config:
for name in CONFIG_NAMES:
cfile = os.path.join(path, name)
if os.path.exists(cfile):
printer.vv("Found config file: {}", cfile)
self.config = Config(cfile)
break
if path == home:
for name in CONFIG_NAMES:
cfile = os.path.join(path, name)
if os.path.exists(cfile):
printer.vv("Found config file: {}", cfile)
self._config = Config(cfile)
return
def _find_root(self):
for path in walk_up(self.workdir):
if os.path.exists(os.path.join(path, METADIR_NAME)):
printer.vv("Found archive directory: {}", path)
self.archive_root = path
return
def resolve(self, ref):
""" Return (client, vault, archive, file) from a reference string
a(/f) -> defaultServer, defaultVault, a, (/f)
/v(/a(/f)) -> defaultServer, v, (a), (/f)
http(s)://server.tpl/v3/v(/a(/f)) -> server, v, (a), (/f)
"""
oref = ref
if ref.startswith("http://") or ref.startswith("https://"):
url, v3, rest = ref.partition("/v3/")
if not v3: raise ValueError("Not a CDSTAR url: " + oref)
client = self.connect(url + v3)
ref = '/' + rest
else:
client = self.connect(self.config['server'])
if ref.startswith('/'):
vault, _, ref = ref[1:].partition('/')
if not vault:
raise ValueError("Not a valid target: " + oref)
else:
vault = self.config['vault']
archive, slash, file = ref.partition("/")
if slash:
file = slash + file
return client, vault, archive, file
def connect(self, server):
if server not in self.connected:
url = urllib.parse.urlsplit(server)
if url.username:
auth = (url.username, url.password or self._ask_pass(url))
client = CDStar(server, auth=auth)
else:
client = CDStar(server)
self.connected[server] = client
return self.connected[server]
@property
def default_vault(self):
if not self.config or 'vault' not in self.config:
if 'vault' not in self.config:
raise CliError("No default vault configured")
return self.config['vault']
@property
def client(self):
if self._client:
return self._client
if not self.config:
raise CliError("Unable to connect: No config found (see `init` command)")
server = self.config["server"]
url = urllib.parse.urlparse(server)
if url.username:
auth = (url.username, url.password or self._ask_pass())
self._client = CDStar(server, auth=auth)
else:
self._client = CDStar(server)
return self._client
def _ask_pass(self, url):
# Enter password for {url.scheme}://{url.netloc}/{url.path} (user={url.username})
raise RuntimeError("Asking for password not implemented yet")
class VerboseTx:
def __init__(self, tx):
self.tx = tx
def __enter__(self):
tx = self.tx.__enter__()
printer.v("Transaction started: {}", self.tx.id)
return tx
def _ask_pass(self):
raise RuntimeError("Asking for passwort not implemented yet")
def __exit__(self, exc_type, exc_val, exc_tb):
id = self.tx.id
self.tx.__exit__(exc_type, exc_val, exc_tb)
if exc_type:
printer("Transaction rolled back: {}", id)
class ArchiveDir:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment