diff --git a/src/repdav/tgapi.py b/src/repdav/tgapi.py deleted file mode 100644 index c3ef2583d2e49677b04f7c292e2fbd965d38e530..0000000000000000000000000000000000000000 --- a/src/repdav/tgapi.py +++ /dev/null @@ -1,220 +0,0 @@ -"""Communicate with the different Textgrid APIs.""" -import io -import xml.etree.ElementTree as ET - -import requests -#from typing import List -from zeep import Client -from zeep.exceptions import TransportError - -from .config import TextgridConfig - -__docformat__ = 'restructuredtext en' -DEFAULT_API_VERSION = "1.0" - - -def lookup_api_path(internal_name: str, api_version: str = DEFAULT_API_VERSION) -> str: - """Helper function that returns an uri fragment to the named API endpoints. - - :param internal_name: Internal API name - :type internal_name: str - :param api_version: API version, defaults to DEFAULT_API_VERSION - :type api_version: str, optional - :raises NotImplementedError: Raises if an unsupported API version is specified - :return: Uri fragment - :rtype: str - """ - if api_version == "1.0": - mapping = { - "search": "/tgsearch/search/", - "info": "/tgsearch/info/", - "navigation": "/tgsearch/navigation/", - "agg": "/tgsearch/navigation/agg/", - "toplevel": "/tgsearch/navigation/toplevel/", - "facet": "/tgsearch/facet/", - "relation": "/tgsearch/relation/", - "crud": "/tgcrud/rest/", - } - elif api_version == "2.0": - raise NotImplementedError - else: - raise NotImplementedError - - return mapping[internal_name] - - -class TextgridAuth: - """Provide access to the Textgrid Authorization Service. - """ - - def __init__(self): - self._config = TextgridConfig() - - def _connect(self) -> Client: - """Internal helper that provides a SOAP client that is configured for - the use with the Textgrid Auth service. - - :return: A SOAP client - :rtype: zeep.Client - """ - client = Client(self._config.auth_wsdl) - # this is a dirty hack; should be remediated - client.service._binding_options["address"] = self._config.auth_address - return client - - # replace ":" with " -> List | None:" when switching to python3.10 - def assigned_projects(self, sid: str): - """Get assigned projects. - - :return: A list of project id strings - :rtype: List | None - """ - client = self._connect() - try: - return client.service.tgAssignedProjects(sid) - except TransportError: - return None - - -class TextgridSearch: - """Provide access to the Textgrid Search Service.""" - - def __init__(self): - self._config = TextgridConfig() - - def info(self, sid: str, textgrid_uri: str) -> dict: - """Get info from tg-uri. - - :return: A decent choice of metadata - :rtype: dict - """ - uri = self._config.host + DEFAULT_API_VERSION + lookup_api_path("info") - response = requests.get(uri + textgrid_uri + "?sid=" + sid) - # TODO implement error handling on http status not in 200..299 - return self._process_response(response) - - def get_project_contents(self, sid: str, project_id: str) -> dict: - """Get objects belonging to a project, filtered by objects that are in - an aggregation in the same project. - - :return: A dictionary with keys of Textgrid URIs and values of metadata dictionaries - :rtype: dict - """ - response = requests.get( - self._config.nav_address + project_id + "?sid=" + sid) - return self._process_response(response) - - def get_aggregation_contents(self, sid: str, textgrid_uri: str) -> dict: - """Get child resources of an aggregation. - - :param sid: Session ID - :type sid: str - :param textgrid_uri: Textgrid URI - :type textgrid_uri: str - :return: A dictionary with keys of Textgrid URIs and values of metadata dictionaries - :rtype: dict - """ - response = requests.get(self._config.nav_address + "agg/" + - textgrid_uri + "?sid=" + sid) - return self._process_response(response) - - def _process_response(self, response: requests.Response) -> dict: - """Process the response of a Textgrid Search Service into a dictionary. - - :param response: Response of the Textgrid Search Service - :type response: requests.Response - :return: A dictionary with keys of Textgrid URIs and values of metadata dictionaries - :rtype: dict - """ - namespaces = self._register_namespaces(response.text) - xml_stream = io.StringIO(response.text) - element_tree = ET.parse(xml_stream) - # findall direct children - tg_results = element_tree.findall("tgs:result", namespaces=namespaces) - result_dict = {} - for result in tg_results: - result_dict[result.find("./object/generic/generated/textgridUri", namespaces).text] = { - # title^=name, format^=content_type, extent^=content_length - "title": result.find("./object/generic/provided/title", namespaces).text, - "format": result.find("./object/generic/provided/format", namespaces).text, - "extent": int(result.find("./object/generic/generated/extent", namespaces).text), - } - - return result_dict - - @staticmethod - def _register_namespaces(xml: str) -> dict: - """Register namespaces to the global ElementTree and return a - dictionary of them. - - :param xml: XML Document - :type xml: str - :return: Namespaces dictionary - :rtype: dict - """ - xml_stream = io.StringIO(xml) - # Uses a list comprehension and element tree's iterparse function to - # create a dictionary containing the namespace prefix and it's uri. The - # underscore is utilized to remove the "start-ns" output. `iterparse` - # returns an iterator providing (event, elem) pairs. - # see https://medium.datadriveninvestor.com/getting-started-using-pythons-elementtree-to-navigate-xml-files-dc9bc720eaa6 - namespaces = {node[0]: node[1] - for _, node in ET.iterparse(xml_stream, events=['start-ns'])} - - # Iterates through the newly created namespace dictionary registering the prefixes - for prefix, uri in namespaces.items(): - ET.register_namespace(prefix + "_", uri) - - return namespaces - - -class TextgridCRUD: - """Provide access to the Textgrid CRUD Service.""" - - def __init__(self): - self._config = TextgridConfig() - - def get_data(self, sid: str, textgrid_uri: str) -> bytes: - """Get resource data. - - :param sid: Session ID - :type sid: str - :param textgrid_uri: Textgrid URI - :type textgrid_uri: str - :return: Response content - :rtype: bytes - """ - response = self._get_resource(sid, textgrid_uri) - return response.content - - def get_metadata(self, sid: str, textgrid_uri: str): - # currently unused - response = self._get_resource(sid, textgrid_uri) - return self._process_response(response) - - def _get_resource(self, sid: str, textgrid_uri: str) -> requests.Response: - """Helper function to get the response headers of a requested resource - and defer the download of the message body. - - :param sid: Session ID - :type sid: str - :param textgrid_uri: Textgrid URI - :type textgrid_uri: str - :return: Response object - :rtype: requests.Response - """ - uri = self._config.host + DEFAULT_API_VERSION + lookup_api_path("crud") - # defer downloading the response body until accessing Response.content - response = requests.get(uri + textgrid_uri + - "/data?sessionId=" + sid, stream=True) - # TODO implement error handling on http status not in 200..299 - return response - - @staticmethod - def _process_response(response: requests.Response) -> dict: - # only called by `get_metadata` => currently unused - result_dict = {} - result_dict["content-type"] = response.headers.get("content-type") - result_dict["content-length"] = int( - response.headers.get("content-length") or 0) - return result_dict