Commit 0c9b1886 authored by parciak's avatar parciak
Browse files

Finished working on Receive endpoint including unit tests.


Signed-off-by: parciak's avatarMarcel Parciak <marcel.parciak@gmail.com>
parent 9f65075a
......@@ -15,6 +15,7 @@ sqlalchemy = "*"
alembic = "*"
pytest = "*"
pycdstar3 = {git = "https://gitlab.gwdg.de/cdstar/pycdstar3"}
jinja2 = "*"
[requires]
python_version = "3.8"
......
......@@ -16,6 +16,7 @@ class BasicSettings(BaseSettings):
couch_user: str = "medic"
couch_pass: str = "medic2020"
couch_db: str = "medic"
static_directory: str = "annotator/static"
def get_version():
......
import datetime
import json
import os
import time
from typing import Any, Dict
from annotator import access_log, error_log
......@@ -119,6 +121,11 @@ def receive(payload: awmodels.RequestReceive, background_tasks: BackgroundTasks)
annotations_file[k] = v
metafile = utils.create_file_for_archive(archive_id)
if not metafile:
raise errors.ActiveWorkflowException(
"Another Error needed here", "Something does not work"
)
background_tasks.add_task(
run_annotation,
archive_id=archive_id,
......@@ -156,17 +163,24 @@ def run_annotation(
for meta_file in jsonld_files:
jsonld_archive["hasPart"].append(utils.get_jsonld_reference(meta_file))
meta_file["isPartOf"] = utils.get_jsonld_reference(meta_file)
meta_file["isPartOf"] = utils.get_jsonld_reference(jsonld_archive)
try:
stores.upload_archive(jsonld_archive)
stores.upload_files(jsonld_files)
except:
errmsg = f"Could not upload annotations for archive {archive_id} to CouchDB"
if not stores.upload_archive(jsonld_archive):
errmsg = (
f"Could not upload archive annotations for archive {archive_id} to CouchDB."
)
error_log.error(errmsg)
os.unlink(metafile)
return
if not stores.upload_files(jsonld_files):
errmsg = f"Could not upload file annotations for files of archive {archive_id} to CouchDB."
error_log.error(errmsg)
utils.write_state_to_metafile("incomplete", metafile)
return
utils.write_state_to_metafile("completed", metafile)
@app.post(
f"/{endpoint_name}/check",
......
......@@ -74,7 +74,7 @@ class PayloadInput(BaseModel):
This model represents the expected input payload when a message is received via `receive`.
"""
archive_id: str = Field(..., example="abcdef123456")
archive_id: str = Field(..., example="a1b2c3d4e5f6")
annotations_archive: Dict[str, Any] = Field(
{}, example={"id": "something", "name": "Some Thing"}
)
......@@ -90,7 +90,7 @@ class PayloadInput(BaseModel):
@staticmethod
def example() -> Dict[str, Any]:
return {
"archive_id": "abcdef123456",
"archive_id": "a1b2c3d4e5f6",
"annotations_archive": {"id": "something", "name": "Some Thing"},
"annotations_file": {"id": "something", "name": "Some Thing"},
}
......@@ -101,13 +101,13 @@ class MessageOutput(BaseModel):
This model represents the produced output schema that will be provided in the messages.
"""
archive_id: str = Field(None, example="abcdef123456")
archive_id: str = Field(None, example="a1b2c3d4e5f6")
couch_uri: str = Field(None, example="http://couch.example.org/database/something")
@staticmethod
def example() -> dict:
return {
"archive_id": "abcdef123456",
"archive_id": "a1b2c3d4e5f6",
"couch_uri": "http://couch.example.org/database/something",
}
......@@ -142,11 +142,11 @@ class MemoryCommon(BaseModel):
This model represents the expected memory content to communicate state of the agent.
"""
archives: List[str] = Field([], example=["abcdef123456"])
archives: List[str] = Field([], example=["a1b2c3d4e5f6"])
@staticmethod
def example() -> dict:
return {"archives": ["abcdef123456"]}
return {"archives": ["a1b2c3d4e5f6"]}
class CredentialsCommon(BaseModel):
......
import datetime
import json
import os
from enum import Enum
from typing import Any, Dict
from pydantic import BaseModel, Field
from jinja2 import Template
from annotator import config
......@@ -20,35 +23,24 @@ class ArchiveInfo(BaseModel):
This model represents [ArchiveInfo](https://cdstar.gwdg.de/docs/dev/#ArchiveInfo) of CDSTAR.
"""
id: str = Field(..., example="abcdef123456")
id: str = Field(..., example="a1b2c3d4e5f6")
vault: str = Field(..., example="medic")
revision: str = Field(None, example="1")
profile: str = Field(None, example="default")
state: ArchiveState = Field(None, example=ArchiveState.open)
created: datetime.date = Field(None, example="2020-07-24T09:40:10.548+0000")
modified: datetime.date = Field(None, example="2020-07-24T09:41:05.881+0000")
created: datetime.datetime = Field(None, example="2020-07-24T09:40:10.548+0000")
modified: datetime.datetime = Field(None, example="2020-07-24T09:41:05.881+0000")
file_count: int = Field(None, example=1)
def to_jsonld(self) -> Dict[str, Any]:
jsonld: Dict[str, Any] = {
"@context": "http://schema.org/",
"@type": "Dataset",
"alternateName": self.id,
"identifier": self.id,
}
if self.file_count:
jsonld["size"] = {
"minValue": 0,
"unitText": "file_count",
"value": self.file_count,
}
if self.revision:
jsonld["version"] = self.revision
if self.created:
jsonld["dateCreated"] = self.created
if self.modified:
jsonld["dateModified"] = self.modified
return jsonld
with open(
os.path.join(
config.BasicSettings().static_directory, "archive.jsonld.jinja"
),
"r",
) as tpl:
template = Template(tpl.read())
return json.loads(template.render(model=self))
class FileInfo(BaseModel):
......@@ -56,12 +48,12 @@ class FileInfo(BaseModel):
This model represents [FileInfo](https://cdstar.gwdg.de/docs/dev/#FileInfo) of CDSTAR.
"""
id: str = Field(..., example="abcd1234")
id: str = Field(..., example="a1b2c3d4e5f6g7h8")
name: str = Field(None, example="example.csv")
type: str = Field(None, example="application/csv")
size: int = Field(None, example=250)
created: datetime.date = Field(None, example="2020-07-24T09:40:10.848+0000")
modified: datetime.date = Field(None, example="2020-07-24T09:41:05.882+0000")
created: datetime.datetime = Field(None, example="2020-07-24T09:40:10.848+0000")
modified: datetime.datetime = Field(None, example="2020-07-24T09:41:05.882+0000")
digests: Dict[str, str] = Field(
None,
example={
......@@ -72,19 +64,9 @@ class FileInfo(BaseModel):
)
def to_jsonld(self) -> Dict[str, Any]:
jsonld: Dict[str, Any] = {
"@context": "http://schema.org/",
"@type": "DataDownload",
"identifier": self.id,
}
if self.name:
jsonld["name"] = self.name
if self.type:
jsonld["encodingFormat"] = self.type
if self.size:
jsonld["contentSize"] = self.size
if self.created:
jsonld["dateCreated"] = self.created
if self.modified:
jsonld["dateModified"] = self.modified
return jsonld
with open(
os.path.join(config.BasicSettings().static_directory, "file.jsonld.jinja"),
"r",
) as tpl:
template = Template(tpl.read())
return json.loads(template.render(model=self))
from __future__ import annotations
from pydantic import BaseModel, Field
from typing import ForwardRef, List
class MetadataBase(BaseModel):
context: str = Field(
"http://schema.org/",
example="http://schema.org/",
alias="@context",
const="http://schema.org/",
)
type: str = Field("Thing", example="Thing", alias="@type", const="Thing")
id: str = Field(None, example="some_id", alias="@id")
name: str = Field(None, example="Some Name")
url: str = Field(None, example="www.example.org")
def schema_org_ref(self) -> dict:
""" Creates a Schema.org valid JSON-LD reference (i.e. only the type and @id) from the pydantic model. It can be used to refer to linked JSON-LD elements.
Returns
-------
dict
A dictionary according to the JSON-LD reference the pydantic model contains.
"""
return {"@type": self.type, "@id": self.id}
class Config:
allow_population_by_alias = True
class MetadataOrganization(MetadataBase):
type: str = Field(
"Organization", example="Organization", alias="@type", const="Organization"
)
email: str = Field(None, example="medic@umg.eu")
location: str = Field(
None, example="Von-Siebold-Straße 3, 37075 Göttingen, Germany"
)
member: MetadataOrganization = Field(None, example=MetadataBase(id="umg_medic"))
memberOf: MetadataOrganization = Field(None, example=MetadataBase(id="umg"))
telephone: str = Field(None, example="+49 551 39 0")
MetadataOrganization.update_forward_refs()
class MetadataCreativeWork(MetadataBase):
type: str = Field(
"CreativeWork", example="CreativeWork", alias="@type", const="CreativeWork"
)
abstract: str = Field(None, example="A generic dataset.")
accessibilityAPI: str = Field(None, example="https://cdstar.gwdg.de/docs/dev/")
acquireLicensePage: str = Field(None, example="mailto:medic@umg.eu")
author: MetadataOrganization = Field(
None, example=MetadataOrganization(id="umg_medic")
)
creator: MetadataOrganization = Field(
None, example=MetadataOrganization(id="umg_medic")
)
dateCreated: str = Field(None, example="2020-01-01T12:00:05")
datePublished: str = Field(None, example="2020-01-01T12:00:05")
encodingFormat: str = Field(None, example="text/plain")
maintainer: MetadataOrganization = Field(
None, example=MetadataOrganization(id="umg_medic")
)
sourceOrganization: MetadataOrganization = Field(
None, example=MetadataOrganization(id="umg_medic")
)
MetadataArchive = ForwardRef("MetadataArchive")
class MetadataFile(MetadataCreativeWork):
type: str = Field(
"DataDownload", example="DataDownload", alias="@type", const="DataDownload"
)
contentSize: str = Field(None, example="12345")
contentUrl: str = Field(
None, example="http://cdstar.example.com/v3/sources/abcdefg1234/filename.ext"
)
description: str = Field(
None, example="Error happened, this field holds the information for that."
)
encodingFormat: str = Field(None, example="text/plain")
isPartOf: MetadataArchive = Field(None, example=MetadataBase(id="SomeDataset"))
uploadDate: str = Field(None, example="2020-01-01T12:00:05")
MetadataArchive = ForwardRef("MetadataArchive")
class MetadataDataCatalog(MetadataCreativeWork):
type: str = Field(
"DataCatalog", example="DataCatalog", alias="@type", const="DataCatalog"
)
dataset: List[MetadataArchive] = Field(
[], example=[MetadataCreativeWork(id="SomeDataset")]
)
class MetadataArchive(MetadataCreativeWork):
type: str = Field("Dataset", example="Dataset", alias="@type", const="Dataset")
includedInDataCatalog: MetadataDataCatalog = Field(
None, example=MetadataCreativeWork(id="umg_medic")
)
distribution: List[MetadataFile] = Field(
None,
example=[
MetadataCreativeWork(id="somefile.csv"),
MetadataCreativeWork(id="anotherfile.csv"),
],
)
hasPart: List[MetadataFile] = Field(
[],
example=[
MetadataCreativeWork(id="somefile.csv"),
MetadataCreativeWork(id="anotherfile.csv"),
],
)
MetadataDataCatalog.update_forward_refs()
MetadataFile.update_forward_refs()
{
"@context": "http://schema.org/",
"@type": "Dataset",
"@id": "http://localhost:5984/medic/{{ model.id }}",
"identifier": "{{ model.id }}",
{% if model.file_count is defined %}
"size": {
"minValue": 0,
"unitText": "file_count",
"value": {{ model.file_count }}
},
{% endif %}
{% if model.revision is defined %}
"version": "{{ model.revision }}",
{% endif %}
{% if model.created is defined %}
"dateCreated": "{{ model.created.isoformat() }}",
{% endif %}
{% if model.modified is defined %}
"dateModified": "{{ model.modified.isoformat() }}",
{% endif %}
"alternateName": "{{ model.id }}",
"hasPart": []
}
\ No newline at end of file
{
"@context": "http://schema.org/",
"@type": "DataDownload",
"@id": "http://localhost:5984/medic/{{ model.id }}",
"identifier": "{{ model.id }}",
{% if model.size is defined %}
"size": {
"minValue": 0,
"unitText": "file_count",
"value": "{{ model.file_count }}"
},
{% endif %}
{% if model.name is defined %}
"name": "{{ model.name }}",
{% endif %}
{% if model.type is defined %}
"encodingFormat": "{{ model.type }}",
{% endif %}
{% if model.size is defined %}
"contentSize": "{{ model.size }}",
{% endif %}
{% if model.created is defined %}
"dateCreated": "{{ model.created.isoformat() }}",
{% endif %}
{% if model.modified is defined %}
"dateModified": "{{ model.modified.isoformat() }}",
{% endif %}
"alternateName": "{{ model.id }}"
}
\ No newline at end of file
import datetime
import json
import os
import sys
from typing import List, Optional, Tuple
import traceback
from typing import Any, Dict, List, Optional, Tuple
import cloudant
import cloudant.database as CloudantDatabase
import cloudant.document as CloudantDocument
from pycdstar3 import CDStar, CDStarVault
from pycdstar3.api import CDStarArchive
from annotator import config
from annotator import error_log
......@@ -29,7 +29,7 @@ def is_valid_archive(archive_id: str) -> bool:
return archive.exists()
except:
t, v, tb = sys.exc_info()
error_log.error(f"{t}: {v}\n{tb}")
error_log.error(f"{t}: {v}\n{traceback.print_tb(tb)}")
return False
......@@ -54,10 +54,47 @@ def get_cdstar_metadata(
return meta_archive, meta_files
except:
t, v, tb = sys.exc_info()
error_log.error(f"{t}: {v}\n{tb}")
error_log.error(f"{t}: {v}\n{traceback.print_tb(tb)}")
return None, []
def upload_archive(archive_meta: Dict[str, Any]) -> bool:
return upload_jsonld(archive_meta)
def upload_files(files_metalist: List[Dict[str, Any]]) -> bool:
for file_meta in files_metalist:
if not upload_jsonld(file_meta):
return False
return True
def upload_jsonld(jsonld: Dict[str, Any]) -> bool:
if "identifier" not in jsonld.keys():
error_log.error(f"Supplied jsonld has no id for `upload_jsonld`: {jsonld}")
return False
try:
with cloudant.couchdb(
config.BasicSettings().couch_user,
config.BasicSettings().couch_pass,
url=config.BasicSettings().couch_uri,
use_basic_auth=True,
) as client:
database: CloudantDatabase.CouchDatabase = client[
config.BasicSettings().couch_db
]
if not database.exists():
return False
jsonld["_id"] = jsonld["identifier"]
created_doc: CloudantDocument.Document = database.create_document(jsonld)
return created_doc.exists()
except:
t, v, tb = sys.exc_info()
error_log.error("Could not connect to CouchDB.")
error_log.error(f"{t}: {v}\n{traceback.print_tb(tb)}")
return False
"""
def upload_files(
filelist: List[str], annotation: dict = {}
......
import datetime
import io
import json
import os
import tempfile
from typing import Any, Dict, Tuple
import cloudant
import cloudant.database as CloudantDatabase
import cloudant.document as CloudantDocument
import pycdstar3
import pytest
import requests
from fastapi.testclient import TestClient
from annotator.main import app, endpoint_name
......@@ -10,6 +18,9 @@ from annotator import config
client = TestClient(app)
generated_files_count = 4
file_list_limit = 2
@pytest.fixture(scope="module")
def cdstar_archive():
......@@ -17,17 +28,60 @@ def cdstar_archive():
config.BasicSettings().cdstar_uri,
auth=(config.BasicSettings().cdstar_user, config.BasicSettings().cdstar_pass),
)
vault = pycdstar3.CDStarVault(cdstar, config.BasicSettings().cdstar_vault)
archive = vault.new_archive()
for i in range(250):
dummy_file = {"some": "thing", "entity": i}
with io.StringIO() as dummy:
dummy.write(json.dumps(dummy_file, indent=2))
archive.file(f"file_{i}.json").put(source=dummy)
with cdstar.begin(autocommit=True):
vault = pycdstar3.CDStarVault(cdstar, config.BasicSettings().cdstar_vault)
archive = vault.new_archive()
for i in range(generated_files_count):
dummy_file = {"some": "thing", "entity": i}
with io.StringIO() as dummy:
dummy.write(json.dumps(dummy_file, indent=2))
archive.file(f"file_{i}.json").put(source=dummy)
yield archive.id
delete_metadata(archive.id)
archive.delete()
def delete_metadata(archive_id: str):
with cloudant.couchdb(
config.BasicSettings().couch_user,
config.BasicSettings().couch_pass,
url=config.BasicSettings().couch_uri,
user_basic_auth=True,
) as client:
db: CloudantDatabase.CouchDatabase = client[config.BasicSettings().couch_db]
archive: CloudantDocument.Document = db[archive_id]
for part in archive["hasPart"]:
f: CloudantDocument.Document = db[part["@id"].split("/")[-1]]
f.delete()
archive.delete()
def is_schemaorg_jsonld(json: Dict[str, Any]) -> bool:
keys = json.keys()
return (
"@context" in keys
and "@id" in keys
and "@type" in keys
and json["@context"].startswith("http://schema.org")
)
def is_schemaorg_reference(reference: Dict[str, Any]) -> bool:
keys = reference.keys()
return "@type" in keys and "@id" in keys
def get_json_of_uri(uri: str, auth: Tuple[str, str] = None) -> Dict[str, Any]:
# Make the request and assert it was successful
r = requests.get(uri, auth=auth)
assert r.status_code >= 200 and r.status_code <= 299
# Get the JSON response and assert it is a dictionary (no application used in testing responds with lists afaik)
r_json = r.json()
assert r_json
assert isinstance(r_json, dict)
return r_json
def test_invalid_request_01():
response = client.get(f"/{endpoint_name}")
assert response.status_code == 405
......@@ -56,6 +110,40 @@ def test_register_01():
def test_receive_01(cdstar_archive):
"""
Tests if a JSON-LD using Schema.org is uploaded to CouchDB.
"""
register_request = {
"method": "receive",
"params": {
"message": {"payload": {"archive_id": cdstar_archive}},
"options": {"annotation": {}},
"memory": {},
"credentials": {},
},
}
response = client.post(f"/{endpoint_name}", json=register_request)
assert response.status_code >= 200 and response.status_code <= 299
assert os.path.exists(
os.path.join(tempfile.gettempdir(), f"meta_{cdstar_archive}.json")
)
# external request: use requests directly instead of the TestClient
meta_json = get_json_of_uri(
f"{config.BasicSettings().couch_uri}/{config.BasicSettings().couch_db}/{cdstar_archive}",
auth=(config.BasicSettings().couch_user, config.BasicSettings().couch_pass),
)
# assert that the response is a JSON-LD of schema.org/Dataset
assert is_schemaorg_jsonld(meta_json)
assert meta_json["@type"] == "Dataset"
assert meta_json["identifier"] == cdstar_archive
def test_receive_02(cdstar_archive):
"""
Tests if the uploaded archive metadata matches the CDSTAR archive.
"""
register_request = {
"method": "receive",
"params": {
......@@ -68,6 +156,126 @@ def test_receive_01(cdstar_archive):