Commit eddeeed2 authored by msuhr1's avatar msuhr1
Browse files

Refactored Liquid templating

Merge branch 'msuhr1-liquid-templating-refactored' into 'master'

See merge request !7
parents 7aed308b fad8311c
Pipeline #186947 passed with stages
in 1 minute and 34 seconds
......@@ -18,7 +18,7 @@ cloudant = "*"
pytest = "*"
pycdstar3 = {git = "https://gitlab.gwdg.de/cdstar/pycdstar3"}
jinja2 = "*"
python-liquid = "*"
liquidpy = "*"
[requires]
python_version = "3.8"
......
This diff is collapsed.
# SPDX-FileCopyrightText: 2020 UMG MeDIC <marcel.parciak@med.uni-goettingen.de>
# SPDX-FileCopyrightText: 2020-2021 UMG MeDIC <medic.tech@med.uni-goettingen.de>
#
# SPDX-License-Identifier: GPL-3.0-or-later
......
# SPDX-FileCopyrightText: 2020-2021 UMG MeDIC <medic.tech@med.uni-goettingen.de>
#
# SPDX-License-Identifier: GPL-3.0-or-later
from liquid import Tag, LiquidRenderError
class TagCredential(Tag):
"""Liquid engine extension to interpret and replace ActiveWorkflow credential syntax.
If a string contains ActiveWorkflow credential reference, e.g. `{% credential reference %}`,
the keyword "reference" shall be looked up in given dictionaries. If the dictionary "credentials"
contains a matching key, the whole Liquid sequence will be replaced with the respective value.
"""
# This is a void Tag extension, meaning that no closing counterpart is required but the
# `{% credential reference %}` sequence itself is complete (as opposed to Liquid constructions
# like `{% for ... %} ... {% endfor %}`)
VOID = True
# the start rule
START = "tag_credential"
# the grammar based on the base_grammar
GRAMMAR = "tag_credential: output"
def parse(self, force=False):
# Use the default parser to parse the output rule
return super().parse(force)
def _render(self, local_vars, global_vars):
# Look up the reference in the "credentials" dictionary (which has to be supplied when rendering
# is initiated by the Liquid engine.
if (
"credentials" in local_vars.keys()
and self.content in local_vars["credentials"].keys()
):
self.content = local_vars["credentials"][self.content]
else:
# Raise an exception if something is referenced but missing from the dictionary of replacement values
raise LiquidRenderError(msg=f"Credential '{self.content}' is not supplied.")
return self.content
# SPDX-FileCopyrightText: 2020 UMG MeDIC <marcel.parciak@med.uni-goettingen.de>
# SPDX-FileCopyrightText: 2020-2021 UMG MeDIC <medic.tech@med.uni-goettingen.de>
#
# SPDX-License-Identifier: GPL-3.0-or-later
import datetime
import json
import os
from typing import Any, Dict, Optional, Union
import fastapi
import fastapi.responses
from fastapi import BackgroundTasks, FastAPI, Request
from liquid import Environment
from annotator import access_log, error_log
from annotator import config
......@@ -42,9 +40,13 @@ def aw_endpoint(payload: awmodels.RequestCommon, background_tasks: BackgroundTas
for more details. The call is forwarded to the correct endpoint.
"""
# Replace credentials in options
payload.params.options = utils.replace_credentials(data=payload.params.options,
credentials=payload.params.credentials)
# Replace Liquid Templating markup in provided options. Values from message and credentials may be
# referenced in Liquid statements.
payload.params.options = utils.render_liquid(
options=payload.params.options,
message=payload.params.message,
credentials=payload.params.credentials,
)
if payload.method is not None and payload.method == "register":
return register(payload=awmodels.RequestRegister.parse_obj(payload.dict()))
......@@ -110,16 +112,10 @@ def receive(payload: awmodels.RequestReceive, background_tasks: BackgroundTasks)
response = awmodels.ResponseReceive()
response.result.memory.archives += payload.params.memory.archives
# create a liquid templating environment and render the options as a JSON
env = Environment()
template = env.from_string(
payload.params.options.json(exclude_none=True, exclude_unset=True)
)
rendered_options = json.loads(template.render(payload.params.message.payload))
payload.params.message.payload = awmodels.PayloadInput.parse_obj(
payload.params.message.payload
)
for k, v in rendered_options.items():
for k, v in payload.params.options.dict().items():
if v:
payload.params.message.payload.__setattr__(k, v)
......@@ -145,10 +141,10 @@ def receive(payload: awmodels.RequestReceive, background_tasks: BackgroundTasks)
vault_id = settings["vault_id"]
if not stores.is_valid_archive(
vault_id=vault_id,
archive_id=archive_id,
cdstar_uri=settings["cdstar_uri"],
cdstar_auth=(settings["cdstar_user"], settings["cdstar_pass"]),
vault_id=vault_id,
archive_id=archive_id,
cdstar_uri=settings["cdstar_uri"],
cdstar_auth=(settings["cdstar_user"], settings["cdstar_pass"]),
):
response.result.errors.append(
f"Archive {archive_id} is not available in CDSTAR vault {vault_id}."
......@@ -191,11 +187,11 @@ def receive(payload: awmodels.RequestReceive, background_tasks: BackgroundTasks)
def run_annotation(
archive_id: str,
annotations_archive: Dict[str, Any],
annotations_file: Dict[str, Any],
metafile: str,
settings: Dict[str, str],
archive_id: str,
annotations_archive: Dict[str, Any],
annotations_file: Dict[str, Any],
metafile: str,
settings: Dict[str, str],
) -> None:
archive, filelist = stores.get_cdstar_metadata(
vault_id=settings["vault_id"],
......@@ -313,7 +309,7 @@ def check(payload: awmodels.RequestCheck):
def get_setting_from_payload(
payload: Union[awmodels.RequestReceive, awmodels.RequestCheck], key: str
payload: Union[awmodels.RequestReceive, awmodels.RequestCheck], key: str
) -> Optional[str]:
msg_payload = payload.params.message.payload.dict()
if key in msg_payload.keys() and msg_payload[key]:
......
......@@ -9,6 +9,7 @@ from pydantic import BaseModel, Field, validator
from annotator import config
# TODO: for now, I use dict for annotations
# from annotator.models_metadata import MetadataBase
......
# SPDX-FileCopyrightText: 2020-2021 UMG MeDIC <medic.tech@med.uni-goettingen.de>
#
# SPDX-License-Identifier: GPL-3.0-or-later
# SPDX-FileCopyrightText: 2020 UMG MeDIC <marcel.parciak@med.uni-goettingen.de>
# SPDX-FileCopyrightText: 2020-2021 UMG MeDIC <medic.tech@med.uni-goettingen.de>
#
# SPDX-License-Identifier: GPL-3.0-or-later
......@@ -13,9 +13,9 @@ import pycdstar3
import pytest
from fastapi.testclient import TestClient
from annotator.main import app, endpoint_name
from annotator import config
from annotator import test_utils
from annotator.main import app, endpoint_name
from annotator.test import test_utils
client = TestClient(app)
......@@ -70,7 +70,7 @@ def test_register_compliance_01():
"""
register_request = {"method": "register", "params": {}}
response = client.post(f"/{endpoint_name}", json=register_request)
assert response.status_code >= 200 and response.status_code <= 299
assert 200 <= response.status_code <= 299
response_data = response.json()
assert response_data
assert "result" in response_data.keys()
......@@ -99,7 +99,7 @@ def disabled_test_receive_compliance_01():
},
}
response = client.post(f"/{endpoint_name}", json=receive_request)
assert response.status_code >= 200 and response.status_code <= 299
assert 200 <= response.status_code <= 299
response_data = response.json()
assert response_data
assert test_utils.is_valid_response(response_data)
......@@ -119,7 +119,7 @@ def disabled_test_receive_compliance_02():
},
}
response = client.post(f"/{endpoint_name}", json=receive_request)
assert response.status_code >= 200 and response.status_code <= 299
assert 200 <= response.status_code <= 299
response_data = response.json()
assert response_data
assert test_utils.is_valid_response(response_data)
......@@ -131,10 +131,15 @@ def disabled_test_check_compliance_01():
"""
check_request = {
"method": "check",
"params": {"message": None, "options": {}, "memory": {}, "credentials": [],},
"params": {
"message": None,
"options": {},
"memory": {},
"credentials": [],
},
}
response = client.post(f"/{endpoint_name}", json=check_request)
assert response.status_code >= 200 and response.status_code <= 299
assert 200 <= response.status_code <= 299
response_data = response.json()
assert response_data
assert test_utils.is_valid_response(response_data)
......@@ -154,7 +159,7 @@ def disabled_test_check_compliance_02():
},
}
response = client.post(f"/{endpoint_name}", json=check_request)
assert response.status_code >= 200 and response.status_code <= 299
assert 200 <= response.status_code <= 299
response_data = response.json()
assert response_data
assert test_utils.is_valid_response(response_data)
......@@ -177,7 +182,7 @@ def test_complete_workflow(cdstar_archive):
},
}
response = client.post(f"/{endpoint_name}", json=receive_request)
assert response.status_code >= 200 and response.status_code <= 299
assert 200 <= response.status_code <= 299
json = response.json()
assert json
......@@ -199,7 +204,7 @@ def test_complete_workflow(cdstar_archive):
},
}
response = client.post(f"/{endpoint_name}", json=check_request)
assert response.status_code >= 200 and response.status_code <= 299
assert 200 <= response.status_code <= 299
response_json = response.json()
assert response_json
......
# SPDX-FileCopyrightText: 2020 UMG MeDIC <marcel.parciak@med.uni-goettingen.de>
# SPDX-FileCopyrightText: 2020-2021 UMG MeDIC <medic.tech@med.uni-goettingen.de>
#
# SPDX-License-Identifier: GPL-3.0-or-later
......@@ -15,7 +15,7 @@ import pytest
from fastapi.testclient import TestClient
from annotator.main import app, endpoint_name
from annotator import test_utils
from annotator.test import test_utils
from annotator import utils
client = TestClient(app)
......@@ -146,4 +146,4 @@ def test_check_04():
response_data = response.json()
assert response_data
if "archives" in response_data["result"]["memory"].keys():
assert len(response_data["result"]["memory"]["archives"]) == 0
\ No newline at end of file
assert len(response_data["result"]["memory"]["archives"]) == 0
# SPDX-FileCopyrightText: 2020 UMG MeDIC <marcel.parciak@med.uni-goettingen.de>
# SPDX-FileCopyrightText: 2020-2021 UMG MeDIC <medic.tech@med.uni-goettingen.de>
#
# SPDX-License-Identifier: GPL-3.0-or-later
......@@ -21,7 +21,7 @@ from fastapi.testclient import TestClient
from annotator.main import app, endpoint_name
from annotator import config
from annotator import test_utils
from annotator.test import test_utils
client = TestClient(app)
......@@ -90,7 +90,7 @@ def test_invalid_request_02():
"""
payload = {"method": "invalid"}
response = client.post(f"/{endpoint_name}", json=payload)
assert response.status_code >= 400 and response.status_code <= 499
assert 400 <= response.status_code <= 499
def post_valid_receive_request(
......@@ -117,7 +117,7 @@ def post_valid_receive_request(
},
}
response = client.post(f"/{endpoint_name}", json=register_request)
assert response.status_code >= 200 and response.status_code <= 299
assert 200 <= response.status_code <= 299
json = response.json()
assert json
assert test_utils.is_valid_response(json)
......@@ -174,7 +174,7 @@ def post_valid_liquid_request(
}
response = client.post(f"/{endpoint_name}", json=register_request)
assert response.status_code >= 200 and response.status_code <= 299
assert 200 <= response.status_code <= 299
json = response.json()
assert json
assert test_utils.is_valid_response(json)
......@@ -266,15 +266,16 @@ def test_receive_03(cdstar_archive):
vault_id = cdstar_archive[0]
archive_id = cdstar_archive[1]
post_valid_receive_request(archive_id)
settings = config.BasicSettings()
# Check if metadata is available for each file of CDSTAR
for file_offset in range(0, generated_files_count, file_list_limit):
# external request: use requests directly instead of the TestClient
cdstar_json = test_utils.get_json_of_uri(
f"{config.BasicSettings().cdstar_uri}/{vault_id}/{archive_id}?files&limit={file_list_limit}&offset={file_offset}",
f"{settings.cdstar_uri}/{vault_id}/{archive_id}?files&limit={file_list_limit}&offset={file_offset}",
auth=(
config.BasicSettings().cdstar_user,
config.BasicSettings().cdstar_pass,
settings.cdstar_user,
settings.cdstar_pass,
),
)
......@@ -283,10 +284,10 @@ def test_receive_03(cdstar_archive):
for file_info in cdstar_json["files"]:
# external request: use requests directly instead of the TestClient
meta_json = test_utils.get_json_of_uri(
f"{config.BasicSettings().couch_uri}/{config.BasicSettings().couch_db}/{file_info['id']}",
f"{settings.couch_uri}/{settings.couch_db}/{file_info['id']}",
auth=(
config.BasicSettings().couch_user,
config.BasicSettings().couch_pass,
settings.couch_user,
settings.couch_pass,
),
)
assert test_utils.is_schemaorg_jsonld(meta_json)
......@@ -629,4 +630,4 @@ def test_receive_12(cdstar_archive):
)
assert "name" in meta_file_json.keys()
assert meta_file_json["name"] == "AddedName"
\ No newline at end of file
assert meta_file_json["name"] == "AddedName"
# SPDX-FileCopyrightText: 2020 UMG MeDIC <marcel.parciak@med.uni-goettingen.de>
# SPDX-FileCopyrightText: 2020-2021 UMG MeDIC <medic.tech@med.uni-goettingen.de>
#
# SPDX-License-Identifier: GPL-3.0-or-later
import os
import tempfile
from typing import Any, Dict, Tuple
import pytest
import requests
from liquid import LiquidRenderError
from annotator.errors import ActiveWorkflowError
from annotator.models_activeworkflow import CredentialsCommon
from annotator.utils import replace_credentials
from annotator.utils import render_liquid
def test_replace_credentials():
"""
Tests whether ActiveWorkflow credential syntax is replaced correctly
"""
def test_render_liquid_credentials():
"""Tests whether ActiveWorkflow credential syntax is replaced correctly."""
params = {
'option1': 'no credential reference',
'option2': '{% credential cred_ref_2 %}',
'option3': 'Just a normal {{ liquid.reference }}, should be ignored',
'option4': {'another': 'dict', 'with_ref': '{% credential cred1 %}'},
'option5': 'reference within {% credential cred1 %} normal text',
'option6': 'two references within {% credential cred1 %} normal {% credential cred_ref_2 %} text'
"option1": "no credential reference",
"option2": "{% credential cred_ref_2 %}",
"option3": "Just a normal {{ liquid.reference }}, should be ignored",
"option4": {"another": "dict", "with_ref": "{% credential cred1 %}"},
"option5": "reference within {% credential cred1 %} normal text",
"option6": "two references within {% credential cred1 %} normal {% credential cred_ref_2 %} text",
"option7": [
"lists",
"are",
"also possible with {% credential cred1 %}",
"{% credential cred_ref_2 %}",
],
}
credentials = [CredentialsCommon(name="cred1", value="thisisasecret"),
CredentialsCommon(name="cred_ref_2", value="thisisanothersecret")]
message = {"payload": {"liquid": {"reference": "replaced"}}}
credentials = [
CredentialsCommon(name="cred1", value="thisisasecret"),
CredentialsCommon(name="cred_ref_2", value="thisisanothersecret"),
]
params_expected = {
'option1': 'no credential reference',
'option2': 'thisisanothersecret',
'option3': 'Just a normal {{ liquid.reference }}, should be ignored',
'option4': {'another': 'dict', 'with_ref': 'thisisasecret'},
'option5': 'reference within thisisasecret normal text',
'option6': 'two references within thisisasecret normal thisisanothersecret text'
"option1": "no credential reference",
"option2": "thisisanothersecret",
"option3": "Just a normal replaced, should be ignored",
"option4": {"another": "dict", "with_ref": "thisisasecret"},
"option5": "reference within thisisasecret normal text",
"option6": "two references within thisisasecret normal thisisanothersecret text",
"option7": [
"lists",
"are",
"also possible with thisisasecret",
"thisisanothersecret",
],
}
params_replaced = replace_credentials(params, credentials)
params_replaced = render_liquid(
options=params, message=message, credentials=credentials
)
assert params_replaced == params_expected
# we expect no changes, if the dictionary contains no references
params = {
'option1': 'no credential reference',
'option3': 'Just a normal {{ liquid.reference }}, should be ignored',
'option4': {'another': 'dict'}
"option1": "no credential reference",
"option3": "Just a normal {{ liquid.reference }}, should be ignored",
"option4": {"another": "dict"},
}
params_replaced = replace_credentials(params, credentials)
assert params_replaced == params
params_expected = {
"option1": "no credential reference",
"option3": "Just a normal replaced, should be ignored",
"option4": {"another": "dict"},
}
params_replaced = render_liquid(
options=params, message=message, credentials=credentials
)
assert params_replaced == params_expected
# we expect no changes, if dictionary contains no references and no credentials are supplied
params_replaced = replace_credentials(params, [])
assert params_replaced == params
params_replaced = render_liquid(options=params, message=message, credentials=[])
assert params_replaced == params_expected
# MissingCredentialException is expected if a credential name is referenced that is not available
# LiquidRenderError is expected if a credential name is referenced that is not available
params = {
'option4': {'another': 'dict', 'with_ref': '{% credential cred1 %}'},
'option5': 'reference within {% credential doesntexist2 %} normal text'
"option4": {"another": "dict", "with_ref": "{% credential cred1 %}"},
"option5": "reference within {% credential doesntexist2 %} normal text",
}
with pytest.raises(ActiveWorkflowError):
replace_credentials(params, credentials)
with pytest.raises(LiquidRenderError):
render_liquid(options=params, message={}, credentials=credentials)
# MissingCredentialException is expected, if no credentials are supplied
with pytest.raises(ActiveWorkflowError):
replace_credentials(params, [])
# LiquidRenderError is expected, if no credentials are supplied
with pytest.raises(LiquidRenderError):
render_liquid(options=params, message={}, credentials=[])
def is_valid_response(json: Dict[str, Any]) -> bool:
......@@ -102,10 +124,10 @@ def is_schemaorg_jsonld(json: Dict[str, Any]) -> bool:
"""
keys = json.keys()
return (
"@context" in keys
and "@id" in keys
and "@type" in keys
and json["@context"].startswith("http://schema.org")
"@context" in keys
and "@id" in keys
and "@type" in keys
and json["@context"].startswith("http://schema.org")
)
......
# SPDX-FileCopyrightText: 2020 UMG MeDIC <marcel.parciak@med.uni-goettingen.de>
# SPDX-FileCopyrightText: 2020-2021 UMG MeDIC <medic.tech@med.uni-goettingen.de>
#
# SPDX-License-Identifier: GPL-3.0-or-later
......@@ -6,17 +6,18 @@ import datetime
import enum
import json
import os
import re as regex
import sys
import tempfile
import traceback
from typing import Any, Dict, Optional, Union, List
from annotator import config
from liquid import tag_manager, Liquid
from annotator import error_log
from annotator import models_activeworkflow as awmodels
from annotator.config import BasicSettings
from annotator.errors import ActiveWorkflowError
from annotator.liquid_tags import TagCredential
from annotator.models_activeworkflow import CredentialsCommon
class AnnotationState(str, enum.Enum):
......@@ -149,7 +150,7 @@ def remove_metafile(path: str) -> bool:
def is_authorized(
params: Union[awmodels.ParamsRegister, awmodels.ParamsReceive, awmodels.ParamsCheck]
params: Union[awmodels.ParamsRegister, awmodels.ParamsReceive, awmodels.ParamsCheck]
) -> bool:
"""
Check if a params section is authorized to use this Annotation agent instance.
......@@ -163,7 +164,7 @@ def is_authorized(
params: Union[awmodels.ParamsRegister, awmodels.ParamsReceive, awmodels.ParamsCheck]
Any params dictionary posted by the requester JSON
Results
Returns
-------
bool
True if parameters hold valid credentials, authorizing any request. False otherwise
......@@ -180,51 +181,51 @@ def is_authorized(
return False
def replace_credentials(data: Dict, credentials: List[awmodels.CredentialsCommon]):
"""
Replace occurrences of AW credential syntax in the items of a Dict
def render_liquid(
options: Dict, message: Dict, credentials: List[CredentialsCommon]
) -> Dict:
"""Apply Liquid templating rule to a dictionary using contest from an
ActiveWorkflow message and Credential list as potential input
Searches for ActiveWorkflow credential references in Dict items' values, i.e. `{% credential ref_id %}` and
replaces the entire string with the value of `ref_id` if that is present as a key in the credentials Dict.
For more information on Liquid, see https://shopify.github.io/liquid/
Parameters
----------
data: Dict
Any dictionary that possibly contains ActiveWorkflow credential references
credentials: List[awmodels.CredentialsCommon]
A List of CredentialsCommon objects
options : Dict
Expects a dictionary of string values containing Liquid templating markup,
e.g. the options passed via ActiveWorkflow requests
message : Dict
Expects a dictionary of string values that will be used as replacements if
referenced in Liquid options parameter
credentials : List[CredentialsCommon]
Additional list of ActiveWorkflow Credentials that may be referenced in Liquid
markup in the options parameter
Results
Returns
-------
data: Dict
The input Dict with replaced values (if applicable)
Dict
The dictionary passed as options parameter with all Liquid markup applied, i.e.
references to contents of message or credentials replaced with the actual values.
"""
for key, value in data.items():
# Enter recursion if value itself is a dictionary
if type(value) is dict:
data[key] = replace_credentials(value, credentials)
# determine whether value contains an AW credential reference
# and look up the reference in the given list of credentials
if type(value) is str:
# Regular expression tests whether a reference to ActiveWorkflow credential is present,
# designated by double-curly-brackets: {% credential credential_name %}
regex_str = "({% credential )(.[^%}]*)( %})"
while (True):
matches = regex.search(regex_str, value)
if matches:
# [0] is the whole string if the regex is matched, [1] is the first group, [2] the second group...
# We need the 2nd group "(.*)" from the regex, which would be the name of the credential
reference = matches[2].strip()
replaced = False
for c in credentials:
if c.name == reference:
value = value.replace(matches[0], c.value)
data[key] = value
replaced = True