diff --git a/src/repdav/tgapi.py b/src/repdav/tgapi.py index 447ee6eab1aa40a624542b461ee223ea3df2bf65..f5c2b6e610c83bb79f01b5d59d4fd2f6f9315553 100644 --- a/src/repdav/tgapi.py +++ b/src/repdav/tgapi.py @@ -1,32 +1,214 @@ +"""Communication with the different Textgrid APIs. +""" +import io +import logging +import requests +import xml.etree.ElementTree as ET #from typing import List from zeep import Client from zeep.exceptions import TransportError from .config import TextgridConfig +DEFAULT_API_VERSION = "1.0" +_logger = logging.getLogger(__name__) + + +def lookup_api_path(internal_name: str, api_version: str = DEFAULT_API_VERSION) -> str: + """Helper function that returns an uri fragment to the named API endpoints. + + :param internal_name: Internal API name + :type internal_name: str + :param api_version: API version, defaults to DEFAULT_API_VERSION + :type api_version: str, optional + :raises NotImplementedError: Raises if an unsupported API version is specified + :return: Uri fragment + :rtype: str + """ + if api_version == "1.0": + mapping = { + "search": "/tgsearch/search/", + "info": "/tgsearch/info/", + "navigation": "/tgsearch/navigation/", + "agg": "/tgsearch/navigation/agg/", + "toplevel": "/tgsearch/navigation/toplevel/", + "facet": "/tgsearch/facet/", + "relation": "/tgsearch/relation/", + "crud": "/tgcrud/rest/", + } + elif api_version == "2.0": + raise NotImplementedError + else: + raise NotImplementedError + + return mapping[internal_name] + class TextgridAuth: + """Provide access to the Textgrid Authorization Service. + """ + def __init__(self): self._config = TextgridConfig() def _connect(self) -> Client: - """Internal helper that provides a SOAP client that is configured for - the use with the Textgrid Auth service. + """Internal helper that provides a SOAP client that is configured for the use with the Textgrid Auth service. - Returns: - Client: A SOAP client. + :return: A SOAP client + :rtype: zeep.Client """ client = Client(self._config.auth_wsdl) - # this is dirty hack; should be remediated + # this is a dirty hack; should be remediated client.service._binding_options["address"] = self._config.auth_address return client # replace ":" with " -> List | None:" when switching to python3.10 def assigned_projects(self, sid: str): - """Return an array of project id strings + """Get assigned projects. + + :return: A list of project id strings + :rtype: List | None """ client = self._connect() try: return client.service.tgAssignedProjects(sid) except TransportError: return None + + +class TextgridSearch: + """Provide access to the Textgrid Search Service.""" + + def __init__(self): + self._config = TextgridConfig() + + def info(self, sid: str, textgrid_uri: str) -> dict: + """Get info from tg-uri. + + :return: A decent choice of metadata + :rtype: dict + """ + uri = self._config.host + DEFAULT_API_VERSION + lookup_api_path("info") + r = requests.get(uri + textgrid_uri + "?sid=" + sid) + # TODO implement error handling on http status not in 200..299 + return self._process_response(r) + + def get_project_contents(self, sid: str, project_id: str) -> dict: + """Get objects belonging to a project, filtered by objects that are in + an aggregation in the same project. + + :return: A dictionary with keys of Textgrid URIs and values of metadata dictionaries + :rtype: dict + """ + r = requests.get(self._config.nav_address + project_id + "?sid=" + sid) + return self._process_response(r) + + def get_aggregation_contents(self, sid: str, textgrid_uri: str) -> dict: + """Get child resources of an aggregation. + + :param sid: Session ID + :type sid: str + :param textgrid_uri: Textgrid URI + :type textgrid_uri: str + :return: A dictionary with keys of Textgrid URIs and values of metadata dictionaries + :rtype: dict + """ + r = requests.get(self._config.nav_address + "agg/" + + textgrid_uri + "?sid=" + sid) + return self._process_response(r) + + def _process_response(self, response: requests.Response) -> dict: + """Process the response of a Textgrid Search Service into a dictionary. + + :param response: Response of the Textgrid Search Service + :type response: requests.Response + :return: A dictionary with keys of Textgrid URIs and values of metadata dictionaries + :rtype: dict + """ + namespaces = self._register_namespaces(response.text) + xml_stream = io.StringIO(response.text) + element_tree = ET.parse(xml_stream) + # findall direct children + tg_results = element_tree.findall("tgs:result", namespaces=namespaces) + result_dict = {} + for result in tg_results: + result_dict[result.get("textgridUri")] = { + # title^=name, format^=content_type, extent^=content_length + "title": result.find("./object/generic/provided/title", namespaces).text, + "format": result.find("./object/generic/provided/format", namespaces).text, + "extent": int(result.find("./object/generic/generated/extent", namespaces).text), + } + + return result_dict + + def _register_namespaces(self, xml: str) -> dict: + """Register namespaces to the global ElementTree and return a + dictionary of them. + + :param xml: XML Document + :type xml: str + :return: Namespaces dictionary + :rtype: dict + """ + xml_stream = io.StringIO(xml) + # Uses a list comprehension and element tree's iterparse function to + # create a dictionary containing the namespace prefix and it's uri. The + # underscore is utilized to remove the "start-ns" output. `iterparse` + # returns an iterator providing (event, elem) pairs. + namespaces = {node[0]: node[1] + for _, node in ET.iterparse(xml_stream, events=['start-ns'])} + + # Iterates through the newly created namespace dictionary registering the prefixes + for prefix, uri in namespaces.items(): + ET.register_namespace(prefix + "_", uri) + + return namespaces + + +class TextgridCRUD: + """Provide access to the Textgrid CRUD Service.""" + def __init__(self): + self._config = TextgridConfig() + + def get_data(self, sid: str, textgrid_uri: str) -> bytes: + """Get resource data. + + :param sid: Session ID + :type sid: str + :param textgrid_uri: Textgrid URI + :type textgrid_uri: str + :return: Response content + :rtype: bytes + """ + response = self._get_resource(sid, textgrid_uri) + return response.content + + def get_metadata(self, sid: str, textgrid_uri: str): + # currently unused + response = self._get_resource(sid, textgrid_uri) + return self._process_response(response) + + def _get_resource(self, sid: str, textgrid_uri: str) -> requests.Response: + """Helper function to get the response headers of a requested resource and defer the download of the message body. + + :param sid: Session ID + :type sid: str + :param textgrid_uri: Textgrid URI + :type textgrid_uri: str + :return: Response object + :rtype: requests.Response + """ + uri = self._config.host + DEFAULT_API_VERSION + lookup_api_path("crud") + # defer downloading the response body until accessing Response.content + r = requests.get(uri + textgrid_uri + + "/data?sessionId=" + sid, stream=True) + # TODO implement error handling on http status not in 200..299 + return r + + def _process_response(self, response: requests.Response) -> dict: + # only called by `get_metadata` => currently unused + result_dict = {} + result_dict["content-type"] = response.headers.get("content-type") + result_dict["content-length"] = int( + response.headers.get("content-length") or 0) + return result_dict