Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
Textgrid Repository WebDAV Server
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
DARIAH-DE
TextGridRep
Textgrid Repository WebDAV Server
Commits
61ce9f7a
Verified
Commit
61ce9f7a
authored
3 years ago
by
Stefan Hynek
Browse files
Options
Downloads
Patches
Plain Diff
feat(tgapi): provide access to the Textgrid Search and CRUD services
parent
564bad89
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/repdav/tgapi.py
+188
-6
188 additions, 6 deletions
src/repdav/tgapi.py
with
188 additions
and
6 deletions
src/repdav/tgapi.py
+
188
−
6
View file @
61ce9f7a
"""
Communication with the different Textgrid APIs.
"""
import
io
import
logging
import
requests
import
xml.etree.ElementTree
as
ET
#from typing import List
from
zeep
import
Client
from
zeep.exceptions
import
TransportError
from
.config
import
TextgridConfig
DEFAULT_API_VERSION
=
"
1.0
"
_logger
=
logging
.
getLogger
(
__name__
)
def
lookup_api_path
(
internal_name
:
str
,
api_version
:
str
=
DEFAULT_API_VERSION
)
->
str
:
"""
Helper function that returns an uri fragment to the named API endpoints.
:param internal_name: Internal API name
:type internal_name: str
:param api_version: API version, defaults to DEFAULT_API_VERSION
:type api_version: str, optional
:raises NotImplementedError: Raises if an unsupported API version is specified
:return: Uri fragment
:rtype: str
"""
if
api_version
==
"
1.0
"
:
mapping
=
{
"
search
"
:
"
/tgsearch/search/
"
,
"
info
"
:
"
/tgsearch/info/
"
,
"
navigation
"
:
"
/tgsearch/navigation/
"
,
"
agg
"
:
"
/tgsearch/navigation/agg/
"
,
"
toplevel
"
:
"
/tgsearch/navigation/toplevel/
"
,
"
facet
"
:
"
/tgsearch/facet/
"
,
"
relation
"
:
"
/tgsearch/relation/
"
,
"
crud
"
:
"
/tgcrud/rest/
"
,
}
elif
api_version
==
"
2.0
"
:
raise
NotImplementedError
else
:
raise
NotImplementedError
return
mapping
[
internal_name
]
class
TextgridAuth
:
"""
Provide access to the Textgrid Authorization Service.
"""
def
__init__
(
self
):
self
.
_config
=
TextgridConfig
()
def
_connect
(
self
)
->
Client
:
"""
Internal helper that provides a SOAP client that is configured for
the use with the Textgrid Auth service.
"""
Internal helper that provides a SOAP client that is configured for the use with the Textgrid Auth service.
R
eturn
s
:
Client: A SOAP c
lient
.
:r
eturn:
A SOAP client
:rtype: zeep.C
lient
"""
client
=
Client
(
self
.
_config
.
auth_wsdl
)
# this is dirty hack; should be remediated
# this is
a
dirty hack; should be remediated
client
.
service
.
_binding_options
[
"
address
"
]
=
self
.
_config
.
auth_address
return
client
# replace ":" with " -> List | None:" when switching to python3.10
def
assigned_projects
(
self
,
sid
:
str
):
"""
Return an array of project id strings
"""
Get assigned projects.
:return: A list of project id strings
:rtype: List | None
"""
client
=
self
.
_connect
()
try
:
return
client
.
service
.
tgAssignedProjects
(
sid
)
except
TransportError
:
return
None
class
TextgridSearch
:
"""
Provide access to the Textgrid Search Service.
"""
def
__init__
(
self
):
self
.
_config
=
TextgridConfig
()
def
info
(
self
,
sid
:
str
,
textgrid_uri
:
str
)
->
dict
:
"""
Get info from tg-uri.
:return: A decent choice of metadata
:rtype: dict
"""
uri
=
self
.
_config
.
host
+
DEFAULT_API_VERSION
+
lookup_api_path
(
"
info
"
)
r
=
requests
.
get
(
uri
+
textgrid_uri
+
"
?sid=
"
+
sid
)
# TODO implement error handling on http status not in 200..299
return
self
.
_process_response
(
r
)
def
get_project_contents
(
self
,
sid
:
str
,
project_id
:
str
)
->
dict
:
"""
Get objects belonging to a project, filtered by objects that are in
an aggregation in the same project.
:return: A dictionary with keys of Textgrid URIs and values of metadata dictionaries
:rtype: dict
"""
r
=
requests
.
get
(
self
.
_config
.
nav_address
+
project_id
+
"
?sid=
"
+
sid
)
return
self
.
_process_response
(
r
)
def
get_aggregation_contents
(
self
,
sid
:
str
,
textgrid_uri
:
str
)
->
dict
:
"""
Get child resources of an aggregation.
:param sid: Session ID
:type sid: str
:param textgrid_uri: Textgrid URI
:type textgrid_uri: str
:return: A dictionary with keys of Textgrid URIs and values of metadata dictionaries
:rtype: dict
"""
r
=
requests
.
get
(
self
.
_config
.
nav_address
+
"
agg/
"
+
textgrid_uri
+
"
?sid=
"
+
sid
)
return
self
.
_process_response
(
r
)
def
_process_response
(
self
,
response
:
requests
.
Response
)
->
dict
:
"""
Process the response of a Textgrid Search Service into a dictionary.
:param response: Response of the Textgrid Search Service
:type response: requests.Response
:return: A dictionary with keys of Textgrid URIs and values of metadata dictionaries
:rtype: dict
"""
namespaces
=
self
.
_register_namespaces
(
response
.
text
)
xml_stream
=
io
.
StringIO
(
response
.
text
)
element_tree
=
ET
.
parse
(
xml_stream
)
# findall direct children
tg_results
=
element_tree
.
findall
(
"
tgs:result
"
,
namespaces
=
namespaces
)
result_dict
=
{}
for
result
in
tg_results
:
result_dict
[
result
.
get
(
"
textgridUri
"
)]
=
{
# title^=name, format^=content_type, extent^=content_length
"
title
"
:
result
.
find
(
"
./object/generic/provided/title
"
,
namespaces
).
text
,
"
format
"
:
result
.
find
(
"
./object/generic/provided/format
"
,
namespaces
).
text
,
"
extent
"
:
int
(
result
.
find
(
"
./object/generic/generated/extent
"
,
namespaces
).
text
),
}
return
result_dict
def
_register_namespaces
(
self
,
xml
:
str
)
->
dict
:
"""
Register namespaces to the global ElementTree and return a
dictionary of them.
:param xml: XML Document
:type xml: str
:return: Namespaces dictionary
:rtype: dict
"""
xml_stream
=
io
.
StringIO
(
xml
)
# Uses a list comprehension and element tree's iterparse function to
# create a dictionary containing the namespace prefix and it's uri. The
# underscore is utilized to remove the "start-ns" output. `iterparse`
# returns an iterator providing (event, elem) pairs.
namespaces
=
{
node
[
0
]:
node
[
1
]
for
_
,
node
in
ET
.
iterparse
(
xml_stream
,
events
=
[
'
start-ns
'
])}
# Iterates through the newly created namespace dictionary registering the prefixes
for
prefix
,
uri
in
namespaces
.
items
():
ET
.
register_namespace
(
prefix
+
"
_
"
,
uri
)
return
namespaces
class
TextgridCRUD
:
"""
Provide access to the Textgrid CRUD Service.
"""
def
__init__
(
self
):
self
.
_config
=
TextgridConfig
()
def
get_data
(
self
,
sid
:
str
,
textgrid_uri
:
str
)
->
bytes
:
"""
Get resource data.
:param sid: Session ID
:type sid: str
:param textgrid_uri: Textgrid URI
:type textgrid_uri: str
:return: Response content
:rtype: bytes
"""
response
=
self
.
_get_resource
(
sid
,
textgrid_uri
)
return
response
.
content
def
get_metadata
(
self
,
sid
:
str
,
textgrid_uri
:
str
):
# currently unused
response
=
self
.
_get_resource
(
sid
,
textgrid_uri
)
return
self
.
_process_response
(
response
)
def
_get_resource
(
self
,
sid
:
str
,
textgrid_uri
:
str
)
->
requests
.
Response
:
"""
Helper function to get the response headers of a requested resource and defer the download of the message body.
:param sid: Session ID
:type sid: str
:param textgrid_uri: Textgrid URI
:type textgrid_uri: str
:return: Response object
:rtype: requests.Response
"""
uri
=
self
.
_config
.
host
+
DEFAULT_API_VERSION
+
lookup_api_path
(
"
crud
"
)
# defer downloading the response body until accessing Response.content
r
=
requests
.
get
(
uri
+
textgrid_uri
+
"
/data?sessionId=
"
+
sid
,
stream
=
True
)
# TODO implement error handling on http status not in 200..299
return
r
def
_process_response
(
self
,
response
:
requests
.
Response
)
->
dict
:
# only called by `get_metadata` => currently unused
result_dict
=
{}
result_dict
[
"
content-type
"
]
=
response
.
headers
.
get
(
"
content-type
"
)
result_dict
[
"
content-length
"
]
=
int
(
response
.
headers
.
get
(
"
content-length
"
)
or
0
)
return
result_dict
This diff is collapsed.
Click to expand it.
Stefan Hynek
@hynek
mentioned in commit
eab3502a
·
3 years ago
mentioned in commit
eab3502a
mentioned in commit eab3502a1317d3865691e0d79c2f72e0ccd8e341
Toggle commit list
Stefan Hynek
@hynek
mentioned in commit
61644a05
·
3 years ago
mentioned in commit
61644a05
mentioned in commit 61644a0511ab3ae6a939d6d3d42107c87ed6072c
Toggle commit list
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment