Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
Textgrid Repository WebDAV Server
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
DARIAH-DE
TextGridRep
Textgrid Repository WebDAV Server
Commits
5d371542
Commit
5d371542
authored
3 years ago
by
Stefan Hynek
Browse files
Options
Downloads
Plain Diff
Merge branch '20-use-tgsearch-from-tgclients-lib' into 'main'
Resolve "use tgsearch from tgclients lib" Closes
#20
See merge request
!11
parents
61644a05
86b7989a
No related branches found
No related tags found
1 merge request
!11
Resolve "use tgsearch from tgclients lib"
Pipeline
#277334
passed
3 years ago
Stage: release
Stage: build
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/repdav/textgrid_dav_provider.py
+32
-14
32 additions, 14 deletions
src/repdav/textgrid_dav_provider.py
src/repdav/tgapi.py
+0
-220
0 additions, 220 deletions
src/repdav/tgapi.py
with
32 additions
and
234 deletions
src/repdav/textgrid_dav_provider.py
+
32
−
14
View file @
5d371542
...
...
@@ -8,13 +8,12 @@ from pprint import pformat
from
tgclients.auth
import
TextgridAuth
from
tgclients.config
import
TextgridConfig
from
tgclients.crud
import
TextgridCRUD
from
tgclients.search
import
TextgridSearch
from
wsgidav.dav_provider
import
DAVCollection
,
DAVNonCollection
,
DAVProvider
from
wsgidav.util
import
join_uri
,
pop_path
from
repdav.stream_tools
import
FileLikeQueue
from
.tgapi
import
TextgridSearch
_logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -80,6 +79,8 @@ class TextgridProject(DAVCollection):
_logger
.
debug
(
"
Called TextgridProject.__init__(self, %s, environ).
"
,
path
)
DAVCollection
.
__init__
(
self
,
path
,
environ
)
self
.
_sid
=
environ
[
"
wsgidav.auth.user_name
"
]
self
.
_tgconfig
=
TextgridConfig
()
self
.
_tgsearch
=
TextgridSearch
(
self
.
_tgconfig
.
search
)
def
create_empty_resource
(
self
,
name
):
pass
...
...
@@ -99,14 +100,22 @@ class TextgridProject(DAVCollection):
# return names
#
# path resolution has to be rewritten before we can work with resource titles
resources
=
TextgridSearch
().
get_project_contents
(
self
.
_sid
,
self
.
path
.
split
(
"
/
"
)[
-
1
]
)
return
resources
.
keys
()
response
=
self
.
_tgsearch
.
list_project_root
(
self
.
path
.
split
(
"
/
"
)[
-
1
],
self
.
_sid
)
names
=
[]
for
result
in
response
.
result
:
names
.
append
(
result
.
object_value
.
generic
.
generated
.
textgrid_uri
.
value
)
return
names
def
get_member
(
self
,
name
):
_logger
.
debug
(
"
Called TextgridProject.get_member(self, %s).
"
,
name
)
info
=
TextgridSearch
().
info
(
self
.
_sid
,
name
)
response
=
self
.
_tgsearch
.
info
(
name
,
self
.
_sid
)
info
=
{
name
:
{
"
title
"
:
response
.
result
[
0
].
object_value
.
generic
.
provided
.
title
,
"
format
"
:
response
.
result
[
0
].
object_value
.
generic
.
provided
.
format
,
"
extent
"
:
response
.
result
[
0
].
object_value
.
generic
.
generated
.
extent
,
}
}
_logger
.
info
(
"
INFO: %s
"
,
info
)
if
"
aggregation
"
in
info
[
name
][
"
format
"
]:
return
TextgridAggregation
(
join_uri
(
self
.
path
,
name
),
self
.
environ
,
info
)
...
...
@@ -161,6 +170,8 @@ class TextgridAggregation(DAVCollection):
DAVCollection
.
__init__
(
self
,
path
,
environ
)
self
.
_sid
=
environ
[
"
wsgidav.auth.user_name
"
]
self
.
_info
=
info
self
.
_tgconfig
=
TextgridConfig
()
self
.
_tgsearch
=
TextgridSearch
(
self
.
_tgconfig
.
search
)
def
create_empty_resource
(
self
,
name
):
pass
...
...
@@ -173,15 +184,22 @@ class TextgridAggregation(DAVCollection):
def
get_member_names
(
self
):
_logger
.
debug
(
"
Called TextgridAggregation.get_member_names(self).
"
)
res
ources
=
TextgridSearch
().
get_aggregation_contents
(
self
.
_sid
,
self
.
path
.
split
(
"
/
"
)[
-
1
]
)
# _logger.debug("RESOURCES: %s", resources
)
return
resources
.
keys
()
res
ponse
=
self
.
_tgsearch
.
list_aggregation
(
self
.
path
.
split
(
"
/
"
)[
-
1
],
self
.
_sid
)
names
=
[
]
for
result
in
response
.
result
:
names
.
append
(
result
.
object_value
.
generic
.
generated
.
textgrid_uri
.
value
)
return
names
def
get_member
(
self
,
name
):
_logger
.
debug
(
"
Called TextgridAggregation.get_member(self, %s).
"
,
name
)
info
=
TextgridSearch
().
info
(
self
.
_sid
,
name
)
response
=
self
.
_tgsearch
.
info
(
name
,
self
.
_sid
)
info
=
{
name
:
{
"
title
"
:
response
.
result
[
0
].
object_value
.
generic
.
provided
.
title
,
"
format
"
:
response
.
result
[
0
].
object_value
.
generic
.
provided
.
format
,
"
extent
"
:
response
.
result
[
0
].
object_value
.
generic
.
generated
.
extent
,
}
}
_logger
.
info
(
"
INFO: %s
"
,
info
)
if
"
aggregation
"
in
info
[
name
][
"
format
"
]:
return
TextgridAggregation
(
join_uri
(
self
.
path
,
name
),
self
.
environ
,
info
)
...
...
@@ -260,7 +278,7 @@ class TextgridResource(DAVNonCollection):
)
queue
=
FileLikeQueue
(
int
(
self
.
_size
))
config
=
TextgridConfig
(
"
http://textgridlab.org/
"
)
config
=
TextgridConfig
()
crud
=
TextgridCRUD
(
config
.
crud
)
metadata
=
crud
.
read_metadata
(
self
.
path
.
split
(
"
/
"
)[
-
1
],
self
.
_sid
).
content
...
...
This diff is collapsed.
Click to expand it.
src/repdav/tgapi.py
deleted
100644 → 0
+
0
−
220
View file @
61644a05
"""
Communicate with the different Textgrid APIs.
"""
import
io
import
xml.etree.ElementTree
as
ET
import
requests
#from typing import List
from
zeep
import
Client
from
zeep.exceptions
import
TransportError
from
.config
import
TextgridConfig
__docformat__
=
'
restructuredtext en
'
DEFAULT_API_VERSION
=
"
1.0
"
def
lookup_api_path
(
internal_name
:
str
,
api_version
:
str
=
DEFAULT_API_VERSION
)
->
str
:
"""
Helper function that returns an uri fragment to the named API endpoints.
:param internal_name: Internal API name
:type internal_name: str
:param api_version: API version, defaults to DEFAULT_API_VERSION
:type api_version: str, optional
:raises NotImplementedError: Raises if an unsupported API version is specified
:return: Uri fragment
:rtype: str
"""
if
api_version
==
"
1.0
"
:
mapping
=
{
"
search
"
:
"
/tgsearch/search/
"
,
"
info
"
:
"
/tgsearch/info/
"
,
"
navigation
"
:
"
/tgsearch/navigation/
"
,
"
agg
"
:
"
/tgsearch/navigation/agg/
"
,
"
toplevel
"
:
"
/tgsearch/navigation/toplevel/
"
,
"
facet
"
:
"
/tgsearch/facet/
"
,
"
relation
"
:
"
/tgsearch/relation/
"
,
"
crud
"
:
"
/tgcrud/rest/
"
,
}
elif
api_version
==
"
2.0
"
:
raise
NotImplementedError
else
:
raise
NotImplementedError
return
mapping
[
internal_name
]
class
TextgridAuth
:
"""
Provide access to the Textgrid Authorization Service.
"""
def
__init__
(
self
):
self
.
_config
=
TextgridConfig
()
def
_connect
(
self
)
->
Client
:
"""
Internal helper that provides a SOAP client that is configured for
the use with the Textgrid Auth service.
:return: A SOAP client
:rtype: zeep.Client
"""
client
=
Client
(
self
.
_config
.
auth_wsdl
)
# this is a dirty hack; should be remediated
client
.
service
.
_binding_options
[
"
address
"
]
=
self
.
_config
.
auth_address
return
client
# replace ":" with " -> List | None:" when switching to python3.10
def
assigned_projects
(
self
,
sid
:
str
):
"""
Get assigned projects.
:return: A list of project id strings
:rtype: List | None
"""
client
=
self
.
_connect
()
try
:
return
client
.
service
.
tgAssignedProjects
(
sid
)
except
TransportError
:
return
None
class
TextgridSearch
:
"""
Provide access to the Textgrid Search Service.
"""
def
__init__
(
self
):
self
.
_config
=
TextgridConfig
()
def
info
(
self
,
sid
:
str
,
textgrid_uri
:
str
)
->
dict
:
"""
Get info from tg-uri.
:return: A decent choice of metadata
:rtype: dict
"""
uri
=
self
.
_config
.
host
+
DEFAULT_API_VERSION
+
lookup_api_path
(
"
info
"
)
response
=
requests
.
get
(
uri
+
textgrid_uri
+
"
?sid=
"
+
sid
)
# TODO implement error handling on http status not in 200..299
return
self
.
_process_response
(
response
)
def
get_project_contents
(
self
,
sid
:
str
,
project_id
:
str
)
->
dict
:
"""
Get objects belonging to a project, filtered by objects that are in
an aggregation in the same project.
:return: A dictionary with keys of Textgrid URIs and values of metadata dictionaries
:rtype: dict
"""
response
=
requests
.
get
(
self
.
_config
.
nav_address
+
project_id
+
"
?sid=
"
+
sid
)
return
self
.
_process_response
(
response
)
def
get_aggregation_contents
(
self
,
sid
:
str
,
textgrid_uri
:
str
)
->
dict
:
"""
Get child resources of an aggregation.
:param sid: Session ID
:type sid: str
:param textgrid_uri: Textgrid URI
:type textgrid_uri: str
:return: A dictionary with keys of Textgrid URIs and values of metadata dictionaries
:rtype: dict
"""
response
=
requests
.
get
(
self
.
_config
.
nav_address
+
"
agg/
"
+
textgrid_uri
+
"
?sid=
"
+
sid
)
return
self
.
_process_response
(
response
)
def
_process_response
(
self
,
response
:
requests
.
Response
)
->
dict
:
"""
Process the response of a Textgrid Search Service into a dictionary.
:param response: Response of the Textgrid Search Service
:type response: requests.Response
:return: A dictionary with keys of Textgrid URIs and values of metadata dictionaries
:rtype: dict
"""
namespaces
=
self
.
_register_namespaces
(
response
.
text
)
xml_stream
=
io
.
StringIO
(
response
.
text
)
element_tree
=
ET
.
parse
(
xml_stream
)
# findall direct children
tg_results
=
element_tree
.
findall
(
"
tgs:result
"
,
namespaces
=
namespaces
)
result_dict
=
{}
for
result
in
tg_results
:
result_dict
[
result
.
find
(
"
./object/generic/generated/textgridUri
"
,
namespaces
).
text
]
=
{
# title^=name, format^=content_type, extent^=content_length
"
title
"
:
result
.
find
(
"
./object/generic/provided/title
"
,
namespaces
).
text
,
"
format
"
:
result
.
find
(
"
./object/generic/provided/format
"
,
namespaces
).
text
,
"
extent
"
:
int
(
result
.
find
(
"
./object/generic/generated/extent
"
,
namespaces
).
text
),
}
return
result_dict
@staticmethod
def
_register_namespaces
(
xml
:
str
)
->
dict
:
"""
Register namespaces to the global ElementTree and return a
dictionary of them.
:param xml: XML Document
:type xml: str
:return: Namespaces dictionary
:rtype: dict
"""
xml_stream
=
io
.
StringIO
(
xml
)
# Uses a list comprehension and element tree's iterparse function to
# create a dictionary containing the namespace prefix and it's uri. The
# underscore is utilized to remove the "start-ns" output. `iterparse`
# returns an iterator providing (event, elem) pairs.
# see https://medium.datadriveninvestor.com/getting-started-using-pythons-elementtree-to-navigate-xml-files-dc9bc720eaa6
namespaces
=
{
node
[
0
]:
node
[
1
]
for
_
,
node
in
ET
.
iterparse
(
xml_stream
,
events
=
[
'
start-ns
'
])}
# Iterates through the newly created namespace dictionary registering the prefixes
for
prefix
,
uri
in
namespaces
.
items
():
ET
.
register_namespace
(
prefix
+
"
_
"
,
uri
)
return
namespaces
class
TextgridCRUD
:
"""
Provide access to the Textgrid CRUD Service.
"""
def
__init__
(
self
):
self
.
_config
=
TextgridConfig
()
def
get_data
(
self
,
sid
:
str
,
textgrid_uri
:
str
)
->
bytes
:
"""
Get resource data.
:param sid: Session ID
:type sid: str
:param textgrid_uri: Textgrid URI
:type textgrid_uri: str
:return: Response content
:rtype: bytes
"""
response
=
self
.
_get_resource
(
sid
,
textgrid_uri
)
return
response
.
content
def
get_metadata
(
self
,
sid
:
str
,
textgrid_uri
:
str
):
# currently unused
response
=
self
.
_get_resource
(
sid
,
textgrid_uri
)
return
self
.
_process_response
(
response
)
def
_get_resource
(
self
,
sid
:
str
,
textgrid_uri
:
str
)
->
requests
.
Response
:
"""
Helper function to get the response headers of a requested resource
and defer the download of the message body.
:param sid: Session ID
:type sid: str
:param textgrid_uri: Textgrid URI
:type textgrid_uri: str
:return: Response object
:rtype: requests.Response
"""
uri
=
self
.
_config
.
host
+
DEFAULT_API_VERSION
+
lookup_api_path
(
"
crud
"
)
# defer downloading the response body until accessing Response.content
response
=
requests
.
get
(
uri
+
textgrid_uri
+
"
/data?sessionId=
"
+
sid
,
stream
=
True
)
# TODO implement error handling on http status not in 200..299
return
response
@staticmethod
def
_process_response
(
response
:
requests
.
Response
)
->
dict
:
# only called by `get_metadata` => currently unused
result_dict
=
{}
result_dict
[
"
content-type
"
]
=
response
.
headers
.
get
(
"
content-type
"
)
result_dict
[
"
content-length
"
]
=
int
(
response
.
headers
.
get
(
"
content-length
"
)
or
0
)
return
result_dict
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment