Commit f537e417 authored by parciak's avatar parciak
Browse files

Merge branch 'parciak-add-liquid-templating' into 'master'

Version 0.4.0

See merge request !5
parents 13f161aa a0648a46
Pipeline #175562 passed with stages
in 1 minute and 20 seconds
......@@ -3,6 +3,7 @@
# SPDX-License-Identifier: GPL-3.0-or-later
stages:
- build
- test
- publish
......@@ -13,30 +14,29 @@ variables:
COUCH_DB: "annotation_agent_test"
build_image:
stage: test
stage: build
tags:
- shell
- medic
- umg
before_script:
- docker info
- docker login --username $CI_REGISTRY_USER --password $CI_REGISTRY_PASSWORD $CI_REGISTRY
script:
- docker build -t $CI_REGISTRY_IMAGE:latest .
- docker build -t $CI_REGISTRY_IMAGE:development .
- docker push $CI_REGISTRY_IMAGE:development
run_tests:
image: $CI_REGISTRY_IMAGE:development
stage: test
tags:
- shell
- docker
- medic
- umg
before_script:
- curl --connect-timeout 5 $CDSTAR_URI
- curl --connect-timeout 5 $COUCH_URI
- python3 -m pip install --upgrade pip
- python3 -m pip install -r requirements.txt
script:
- python3 -m pytest --junitxml=report.xml
......
......@@ -15,11 +15,10 @@ pipenv-to-requirements = "*"
[packages]
cloudant = "*"
sqlalchemy = "*"
alembic = "*"
pytest = "*"
pycdstar3 = {git = "https://gitlab.gwdg.de/cdstar/pycdstar3"}
jinja2 = "*"
python-liquid = "*"
[requires]
python_version = "3.8"
......
This diff is collapsed.
......@@ -97,6 +97,41 @@ Full response:
}
```
#### Liquid Templating
You may use Liquid Template in the options in order to get values from an input message. Lets assume you have an message output like this from any agent:
```
{
"cdstar_archiv": "a1b2c4d4e5f6",
"archiv_metadaten": { },
"datei_metadaten": {
"name": "SAP Einwilligungen Export aus Onkostar"
}
}
```
You can refer to any value using the dot notation and put it into your options:
```
{
"archive_id": "{{ cdstar_archiv }}",
"vault_id": "medic",
"couch_db": "medic",
"annotations_archive": {
"name": "{{ datei_metadaten.name }}",
"abstract": "An export of patient consent data from SAP.",
"sourceOrganization": {
"@id": "https://medic.umg.eu/metadata/Organization#umg",
"@type": "Organization"
}
},
"annotations_file": {}
}
```
See the [Liquid Documentation]() for more information. Filters are also enabled and can be used, but they are not explicitly tested. Note that you have to refer to specific values, do not refer to subtrees. See `annotations_archive.name` as an example above: the value at `datei_metadaten.name` will be inserted within double quotes, making it necessary for it to evaluate to a string.
### Check
Checks the status of annotations in progress. Has to supply archives using the memory, which is the only variable that matters during a check request.
......
......@@ -2,6 +2,6 @@
#
# SPDX-License-Identifier: GPL-3.0-or-later
VERSION = (0, 3, 2)
VERSION = (0, 4, 0)
__version__ = ".".join(map(str, VERSION))
......@@ -3,8 +3,11 @@
# SPDX-License-Identifier: GPL-3.0-or-later
import datetime
import json
import os
from typing import Any, Dict, Optional, Union
from typing import Any, Dict, List, Optional, Union
from liquid.context import Template
from annotator import access_log, error_log
from annotator import config
......@@ -17,6 +20,8 @@ from fastapi import BackgroundTasks, FastAPI, Request
import fastapi
import fastapi.responses
from liquid import Environment
app = FastAPI()
endpoint_name = "annotator"
......@@ -103,6 +108,19 @@ def receive(payload: awmodels.RequestReceive, background_tasks: BackgroundTasks)
response = awmodels.ResponseReceive()
response.result.memory.archives += payload.params.memory.archives
# create a liquid templating environment and render the options as a JSON
env = Environment()
template = env.from_string(
payload.params.options.json(exclude_none=True, exclude_unset=True)
)
rendered_options = json.loads(template.render(payload.params.message.payload))
payload.params.message.payload = awmodels.PayloadInput.parse_obj(
payload.params.message.payload
)
for k, v in rendered_options.items():
if v:
payload.params.message.payload.__setattr__(k, v)
archive_id = payload.params.message.payload.archive_id
settings = {}
for key in [
......
......@@ -3,7 +3,7 @@
# SPDX-License-Identifier: GPL-3.0-or-later
import abc
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, List, Optional, Tuple, Union
from pydantic import BaseModel, Field, validator
......@@ -91,7 +91,7 @@ class PayloadInput(BaseModel):
This model represents the expected input payload when a message is received via `receive`.
"""
archive_id: str = Field(..., example="a1b2c3d4e5f6")
archive_id: str = Field(None, example="a1b2c3d4e5f6")
vault_id: str = Field(None, example="medic")
cdstar_uri: str = Field(None, example="http://localhost:8082/v3")
cdstar_user: str = Field(None, example="someuser")
......@@ -149,12 +149,13 @@ class OptionsCommon(BaseModel):
This model represents expected options to configure this agent (or rather a call to this agent).
"""
archive_id: str = Field(None, example="{{ archive_id_key }}")
vault_id: str = Field(None, example="medic")
cdstar_uri: str = Field(None, example="http://localhost:8082/v3")
cdstar_user: str = Field(None, example="someuser")
cdstar_pass: str = Field(None, example="somepass")
couch_db: str = Field(None, example="medic")
couch_uri: str = Field(None, example="http://lcoalhost:5984")
couch_uri: str = Field(None, example="http://localhost:5984")
couch_user: str = Field(None, example="someuser")
couch_pass: str = Field(None, example="somepass")
api_key: str = Field("", example="")
......@@ -179,9 +180,7 @@ class MemoryCommon(BaseModel):
This model represents the expected memory content to communicate state of the agent.
"""
archives: List[Tuple[str, str]] = Field([], example=[(
"medic", "a1b2c3d4e5f6"
)])
archives: List[Tuple[str, str]] = Field([], example=[("medic", "a1b2c3d4e5f6")])
@staticmethod
def example() -> dict:
......@@ -235,7 +234,7 @@ class ResponseRegister(ResponseCommon):
class MessageInputReceive(BaseModel):
payload: PayloadInput
payload: Union[Dict[str, Any], PayloadInput] # Union[PayloadInput, Dict[str, Any]]
class ParamsReceive(ParamsCommon):
......
......@@ -6,6 +6,8 @@ import datetime
import io
import json
import os
import random
import string
import tempfile
import time
from typing import Any, Dict
......@@ -23,7 +25,7 @@ from annotator import test_utils
client = TestClient(app)
generated_files_count = 250
generated_files_count = 25 # 0
file_list_limit = 50
......@@ -60,11 +62,13 @@ def delete_metadata(vault_id: str, archive_id: str):
user_basic_auth=True,
) as client:
db: CloudantDatabase.CouchDatabase = client[config.BasicSettings().couch_db]
wait_iterations = 3
wait_iterations = 1
while wait_iterations > 0 and archive_id not in db:
# wait for receive to finish in order to ensure no dead docs are created.
time.sleep(3)
wait_iterations -= 1
if archive_id not in db:
return
archive: CloudantDocument.Document = db[archive_id]
for part in archive["hasPart"]:
f: CloudantDocument.Document = db[part["@id"].split("/")[-1]]
......@@ -91,7 +95,6 @@ def test_invalid_request_02():
def post_valid_receive_request(
cdstar_archive: str,
annotation: Dict[str, Any] = {},
annot_archive: Dict[str, Any] = {},
annot_file: Dict[str, Any] = {},
) -> Dict[str, Any]:
......@@ -108,7 +111,7 @@ def post_valid_receive_request(
"annotations_file": annot_file,
}
},
"options": {"annotation": annotation},
"options": {},
"memory": {},
"credentials": [],
},
......@@ -121,6 +124,63 @@ def post_valid_receive_request(
return json
def post_valid_liquid_request(
cdstar_archive: str,
annot_archive: Dict[str, Any] = {},
annot_file: Dict[str, Any] = {},
) -> Dict[str, Any]:
"""
This method is used to create requests analogous to post_valid_receive_request while using
liquid templating.
"""
mock_message = {"archiv": {}, "datei": {}}
cdstar_key = "".join(random.choices(string.ascii_letters, k=8))
mock_message[cdstar_key] = cdstar_archive
# create a set of random keys for archive annotations
annot_archive_keys = {}
for k, v in annot_archive.items():
random_key = "".join(random.choices(string.ascii_letters, k=10))
while random_key in annot_archive_keys.keys():
random_key = "".join(random.choices(string.ascii_letters, k=10))
annot_archive_keys[random_key] = k
mock_message["archiv"][random_key] = v
# create a set of random keys for file annotations
annot_file_keys = {}
for k, v in annot_file.items():
random_key = "".join(random.choices(string.ascii_letters, k=12))
while random_key in annot_file_keys.keys():
random_key = "".join(random.choices(string.ascii_letters, k=12))
annot_file_keys[random_key] = k
mock_message["datei"][random_key] = v
register_request = {
"method": "receive",
"params": {
"message": {"payload": mock_message},
"options": {
"archive_id": "{{ " + cdstar_key + " }}",
"annotations_archive": {
v: "{{ archiv." + k + " }}" for k, v in annot_archive_keys.items()
},
"annotations_file": {
v: "{{ datei." + k + " }}" for k, v in annot_file_keys.items()
},
},
"memory": {},
"credentials": [],
},
}
response = client.post(f"/{endpoint_name}", json=register_request)
assert response.status_code >= 200 and response.status_code <= 299
json = response.json()
assert json
assert test_utils.is_valid_response(json)
return json
def test_receive_01(cdstar_archive):
"""
Tests if a JSON-LD using Schema.org is uploaded to CouchDB.
......@@ -329,7 +389,7 @@ def test_receive_06(cdstar_archive):
"used_job_version": "1.0",
}
response_data = post_valid_receive_request(
cdstar_archive[1], annotation={}, annot_archive=annot_archive, annot_file={}
cdstar_archive[1], annot_archive=annot_archive, annot_file={}
)
meta_json = test_utils.get_json_of_uri(
......@@ -365,7 +425,7 @@ def test_receive_07(cdstar_archive):
"abstract": "This is a dummy file, please do not do any science with it!",
}
response_data = post_valid_receive_request(
cdstar_archive[1], annotation={}, annot_archive={}, annot_file=annot_file
cdstar_archive[1], annot_archive={}, annot_file=annot_file
)
meta_json = test_utils.get_json_of_uri(
......@@ -409,3 +469,164 @@ def test_receive_invalid_archive_01():
assert len(response_data["result"]["errors"]) >= 1
assert len(response_data["result"]["logs"]) == 0
assert len(response_data["result"]["messages"]) == 0
def test_receive_08(cdstar_archive):
"""
Tests if a JSON-LD using Schema.org is uploaded to CouchDB using Liquid Templating.
"""
vault_id = cdstar_archive[0]
archive_id = cdstar_archive[1]
post_valid_liquid_request(archive_id)
assert os.path.exists(
os.path.join(
tempfile.gettempdir(),
f"meta_{vault_id}_{archive_id}.json",
)
)
# external request: use requests directly instead of the TestClient
meta_json = test_utils.get_json_of_uri(
f"{config.BasicSettings().couch_uri}/{config.BasicSettings().couch_db}/{archive_id}",
auth=(config.BasicSettings().couch_user, config.BasicSettings().couch_pass),
)
# assert that the response is a JSON-LD of schema.org/Dataset
assert test_utils.is_schemaorg_jsonld(meta_json)
assert meta_json["@type"] == "Dataset"
assert meta_json["identifier"] == archive_id
test_utils.cleanup_metafile(vault_id, archive_id)
def test_receive_09(cdstar_archive):
"""
Tests if the uploaded archive metadata matches the CDSTAR archive using liquid templating.
"""
vault_id = cdstar_archive[0]
archive_id = cdstar_archive[1]
post_valid_liquid_request(archive_id)
# external request: use requests directly instead of the TestClient
cdstar_json = test_utils.get_json_of_uri(
f"{config.BasicSettings().cdstar_uri}/{vault_id}/{archive_id}",
auth=(config.BasicSettings().cdstar_user, config.BasicSettings().cdstar_pass),
)
# external request: use requests directly instead of the TestClient
meta_json = test_utils.get_json_of_uri(
f"{config.BasicSettings().couch_uri}/{config.BasicSettings().couch_db}/{archive_id}",
auth=(config.BasicSettings().couch_user, config.BasicSettings().couch_pass),
)
# Compare both JSON replies and the data they contain
assert test_utils.is_schemaorg_jsonld(meta_json)
assert meta_json["identifier"] == archive_id
# across python versions, parsing datetimes does not work homogeneously (changed in Python 3.7). Fix this
cdstar_json["created"] = cdstar_json["created"][:-4] + cdstar_json["created"][
-4:
].replace(":", "")
cdstar_json["modified"] = cdstar_json["modified"][:-4] + cdstar_json["modified"][
-4:
].replace(":", "")
meta_json["dateCreated"] = meta_json["dateCreated"][:-4] + meta_json["dateCreated"][
-4:
].replace(":", "")
meta_json["dateModified"] = meta_json["dateModified"][:-4] + meta_json[
"dateModified"
][-4:].replace(":", "")
assert datetime.datetime.strptime(
cdstar_json["created"], "%Y-%m-%dT%H:%M:%S.%f%z"
) == datetime.datetime.strptime(meta_json["dateCreated"], "%Y-%m-%dT%H:%M:%S.%f%z")
assert datetime.datetime.strptime(
cdstar_json["modified"], "%Y-%m-%dT%H:%M:%S.%f%z"
) == datetime.datetime.strptime(meta_json["dateModified"], "%Y-%m-%dT%H:%M:%S.%f%z")
assert int(cdstar_json["file_count"]) == int(meta_json["size"]["value"])
assert int(cdstar_json["file_count"]) == len(meta_json["hasPart"])
test_utils.cleanup_metafile(vault_id, archive_id)
def test_receive_10(cdstar_archive):
"""
Tests if a receive request yields a memory for checking afterwards using liquid templating.
"""
vault_id = cdstar_archive[0]
archive_id = cdstar_archive[1]
response_data = post_valid_liquid_request(archive_id)
assert test_utils.has_memory_archive(response_data)
test_utils.cleanup_metafile(vault_id, archive_id)
def test_receive_11(cdstar_archive):
"""
Tests if supplied archive annotations are set to CouchDB accordingly using liquid templating.
"""
annot_archive = {
# TODO: adding whole subtrees does not work out of the box.
# "maintainer": {
# "@id": "../meta/marcel.parciak@med.uni-goettingen.de",
# "@type": "Person",
# },
"used_job": "DummyJob",
"used_job_version": "1.0",
}
response_data = post_valid_liquid_request(
cdstar_archive[1], annot_archive=annot_archive, annot_file={}
)
meta_json = test_utils.get_json_of_uri(
f"{config.BasicSettings().couch_uri}/{config.BasicSettings().couch_db}/{cdstar_archive[1]}",
auth=(config.BasicSettings().couch_user, config.BasicSettings().couch_pass),
)
assert "used_job" in meta_json.keys()
assert meta_json["used_job"] == "DummyJob"
assert "used_job_version" in meta_json.keys()
assert meta_json["used_job_version"] == "1.0"
def test_receive_12(cdstar_archive):
"""
Tests if supplied file annotations are set to CouchDB accordingly using liquid templating.
"""
annot_file = {
# TODO: addin whole subtrees does not work out of the box
# "creator": {
# "@id": "../meta/marcel.parciak@med.uni-goettingen.de",
# "@type": "Person",
# },
"abstract": "This is a dummy file, please do not do any science with it!",
"name": "AddedName",
}
response_data = post_valid_liquid_request(
cdstar_archive[1], annot_archive={}, annot_file=annot_file
)
meta_json = test_utils.get_json_of_uri(
f"{config.BasicSettings().couch_uri}/{config.BasicSettings().couch_db}/{cdstar_archive[1]}",
auth=(config.BasicSettings().couch_user, config.BasicSettings().couch_pass),
)
assert "hasPart" in meta_json.keys()
assert type(meta_json["hasPart"]) == list
for part in meta_json["hasPart"]:
meta_file_json = test_utils.get_json_of_uri(
f"{part['@id']}",
auth=(config.BasicSettings().couch_user, config.BasicSettings().couch_pass),
)
assert "abstract" in meta_file_json.keys()
assert (
meta_file_json["abstract"]
== "This is a dummy file, please do not do any science with it!"
)
assert "name" in meta_file_json.keys()
assert meta_file_json["name"] == "AddedName"
\ No newline at end of file
......@@ -15,18 +15,25 @@
################################################################################
appdirs==1.4.4
attrs==19.3.0
black==19.10b0
click==7.1.2
fastapi==0.60.1
h11==0.9.0
httptools==0.1.1 ; sys_platform != 'win32' and sys_platform != 'cygwin' and platform_python_implementation != 'PyPy'
pathspec==0.8.0
pydantic==1.6.1
regex==2020.7.14
starlette==0.13.6
toml==0.10.1
black==20.8b1
certifi==2020.12.5
click==7.1.2 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
distlib==0.3.1
fastapi==0.62.0
filelock==3.0.12
h11==0.11.0
mypy-extensions==0.4.3
pathspec==0.8.1
pbr==5.5.1 ; python_version >= '2.6'
pipenv-to-requirements==0.9.0
pipenv==2020.11.15 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
pydantic==1.7.3 ; python_version >= '3.6'
regex==2020.11.13
six==1.15.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
starlette==0.13.6 ; python_version >= '3.6'
toml==0.10.2 ; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'
typed-ast==1.4.1
uvicorn==0.11.6
uvloop==0.14.0 ; sys_platform != 'win32' and sys_platform != 'cygwin' and platform_python_implementation != 'PyPy'
websockets==8.1
typing-extensions==3.7.4.3
uvicorn==0.12.3
virtualenv-clone==0.5.4 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
virtualenv==20.2.2 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
......@@ -2,52 +2,43 @@
#
# SPDX-License-Identifier: GPL-3.0-or-later
alembic==1.4.2
appdirs==1.4.4
attrs==19.3.0
black==19.10b0
certifi==2020.6.20
################################################################################
# This requirements file has been automatically generated from `Pipfile` with
# `pipenv-to-requirements`
#
#
# This has been done to maintain backward compatibility with tools and services
# that do not support `Pipfile` yet.
#
# Do NOT edit it directly, use `pipenv install [-d]` to modify `Pipfile` and
# `Pipfile.lock` and then regenerate `requirements*.txt`.
################################################################################
alembic==1.4.3
attrs==20.3.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
certifi==2020.12.5
chardet==3.0.4
click==7.1.2
cloudant==2.13.0
distlib==0.3.1
fastapi==0.59.0
filelock==3.0.12
h11==0.9.0
httptools==0.1.1
idna==2.10
iniconfig==1.0.0
iso8601==0.1.12
Jinja2==3.0.0a1
Mako==1.1.3
MarkupSafe==2.0.0a1
more-itertools==8.4.0
packaging==20.4
pathspec==0.8.0
pbr==5.4.5
pipenv==2020.6.2
pipenv-to-requirements==0.9.0
pluggy==0.13.1
py==1.9.0
cloudant==2.14.0
idna==2.10 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
iniconfig==1.1.1
iso8601==0.1.13
jinja2==2.11.2
mako==1.1.3 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
markupsafe==1.1.1 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
packaging==20.7 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
pluggy==0.13.1 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
py==1.9.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
pycdstar3 @ git+https://gitlab.gwdg.de/cdstar/pycdstar3@bde5c7ee604a74e8b20d770716a59310b8f19166#egg=pycdstar3
pydantic==1.6
pyparsing==3.0.0a2
pytest==6.0.0rc1
python-dateutil==2.8.1
pyparsing==2.4.7 ; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'
pytest==6.1.2
python-dateutil==2.8.1 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
python-editor==1.0.4
regex==2020.6.8
requests==2.24.0
python-liquid==0.5.6
requests-toolbelt==0.9.1
six==1.15.0
SQLAlchemy==1.3.18
starlette==0.13.4
requests==2.25.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'