diff --git a/CHANGELOG.md b/CHANGELOG.md index 5131bcba7eded9035a54cd4928aa42f71c850365..b085ed73408a3fb9c5c0739f97f3782ee6ae1248 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,16 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [2.1.0] - 2024-03-27 + +### Added + +- tgsearch client for the `/search` endpoint + +### Changed + +- moving away from tgcrud HEAD requests to tgsearch requests for identifing images during publish process + ## [2.0.0] - 2023-12-11 ### Changed diff --git a/README.md b/README.md index 7370bff3d62d1abbd73d472c7ca614cc6764bb92..c7839888ec80d7a5003cef269fec57d0cbd7cdce 100644 --- a/README.md +++ b/README.md @@ -8,22 +8,23 @@ A simple application for transferring data from the TextGridLab to eXist-db. <!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE --> ## Table of Contents -* [TextGrid Connect Standalone](#textgrid-connect-standalone) - * [What it does](#what-it-does) - * [What it doesn't do](#what-it-doesnt-do) - * [Getting Started](#getting-started) - * [Prerequisites](#prerequisites) - * [Local development with Docker](#local-development-with-docker) - * [Local development without Docker](#local-development-without-docker) - * [Installing](#installing) - * [Build the xar](#build-the-xar) - * [Get it from DARIAH-DE's eXist repository](#get-it-from-dariah-des-exist-repository) - * [Using the module within TextGrid](#using-the-module-within-textgrid) - * [Running the tests](#running-the-tests) - * [Versioning](#versioning) - * [Authors](#authors) - * [License](#license) - * [Acknowledgments](#acknowledgments) +- [TextGrid Connect Standalone](#textgrid-connect-standalone) + - [Table of Contents](#table-of-contents) + - [What it does](#what-it-does) + - [What it doesn't do](#what-it-doesnt-do) + - [Getting Started](#getting-started) + - [Prerequisites](#prerequisites) + - [Local development with Docker](#local-development-with-docker) + - [Local development without Docker](#local-development-without-docker) + - [Installing](#installing) + - [Build the xar](#build-the-xar) + - [Get it from DARIAH-DE's eXist repository](#get-it-from-dariah-des-exist-repository) + - [Using the module within TextGrid](#using-the-module-within-textgrid) + - [Running the tests](#running-the-tests) + - [Versioning](#versioning) + - [Authors](#authors) + - [License](#license) + - [Acknowledgments](#acknowledgments) <!-- END doctoc generated TOC please keep comment here to allow auto update --> @@ -40,6 +41,8 @@ A simple application for transferring data from the TextGridLab to eXist-db. * pushing data to TextGrid * creating data on the TextGrid server +**Limitation: Recursive publish process can deal with objects from a single project. Check where a `project.id` is in use.** + ## Getting Started These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. @@ -135,7 +138,7 @@ For the versions available, see the [tags on this repository](https://gitlab.gwd ## Authors * **Michelle Weidling** - *Setting up the standalone version* - [mrodzis](https://gitlab.gwdg.de/mrodzis) -* **Mathias Göbel** - [mgoebel](https://gitlab.gwdg.de/mgoebel) +* **Mathias Göbel** - *Nothing but placing his name here* [mgoebel](https://gitlab.gwdg.de/mgoebel) See also the list of [contributors](https://gitlab.gwdg.de/SADE/textgrid-connect-standalone/-/graphs/develop) who participated in this project. diff --git a/expath-pkg.xml b/expath-pkg.xml index ec01eac96cd8fdb0c3c2c1a1e7bd26115e5e5f74..3ee89af04a206844cafceff0fb31d2377601bba0 100644 --- a/expath-pkg.xml +++ b/expath-pkg.xml @@ -1,7 +1,7 @@ <?xml version="1.0" encoding="UTF-8"?> <package xmlns="http://expath.org/ns/pkg" name="http://sub.uni-goettingen.de/tg-connect-standalone" abbrev="tg-connect-standalone" - version="2.0.0" + version="2.1.0" spec="1.0"> <title>TextGrid Connect</title> <dependency processor="http://exist-db.org" semver-min="6.0.1" /> diff --git a/modules/client.xqm b/modules/client.xqm index 254ee2b660f32600870043aecc5530bb5b9eba72..1b981568ccc8c74feedbcc30fa8b363cfcab66c6 100644 --- a/modules/client.xqm +++ b/modules/client.xqm @@ -192,3 +192,72 @@ declare function client:remove-prefix($tguri as xs:string) as xs:string { else $tguri }; + +(:~ + : tg-search query function + :) +declare %private function local:tgsearch-request( + $tgsearch-url as xs:anyURI, + $query as xs:string, + $limit as xs:nonNegativeInteger, + $start as xs:nonNegativeInteger, + $filters as xs:string*, + $sessionId as xs:string?) +as element(tgs:response) { + let $params := ( + "q=" || encode-for-uri($query), + "limit=" || $limit, + "start=" || $start, + "sid=" || $sessionId, + $filters + ) + + let $get-params := string-join($params, "&") + let $request := <hc:request method="get" href="{$tgsearch-url}/search?{$get-params}" /> + let $make-request := hc:send-request($request) + let $responseHeader := $make-request[1] + let $response := $make-request[2] + + return + $response/* +}; + +(: + : tg-search client: /search + : @param $tgsearch-url: endpoint address + : @param $query: query according to lucene syntax + : @param $limit: number of results returned (per page) + : @param $start: zero based offset to start listing hits + : @param $filers: filter to apply to the query, incl. the `filter=` param name + : @param $sessionId: a valid textgrid sessionId to query nonpublic resoures. + :) +declare function client:tgsearch-search( + $tgsearch-url as xs:anyURI, + $query as xs:string, + $limit as xs:nonNegativeInteger?, + $start as xs:nonNegativeInteger?, + $filters as xs:string*, + $sessionId as xs:string?) +as element(tgs:result)* +{ + let $limit := if ( exists($limit)) then $limit else 50 + let $start := if ( exists($start)) then $start else 0 + let $query := if ( exists($query) and $query ne "") then $query else "*" + + let $init := local:tgsearch-request($tgsearch-url, $query, $limit, $start, $filters, $sessionId) + let $hits := xs:nonNegativeInteger($init/@hits) + let $turns := ($hits -1) idiv $limit +1 (: cutting edge :) + + let $results := + if ($turns eq 1) then $init + else + ($init, (: we do not repeat the first query, so we have to add :) + for $i in 1 to $turns + let $start := $i * $limit + return + local:tgsearch-request($tgsearch-url, $query, $limit, $start, $filters, $sessionId) + ) + return ( + $results//tgs:result + ) +}; \ No newline at end of file diff --git a/modules/connect.xqm b/modules/connect.xqm index f52167738c4117c9525723f1781a1ae489eaccf7..0c52b99dd31c33a85b2962b49d629810310acb26 100644 --- a/modules/connect.xqm +++ b/modules/connect.xqm @@ -35,36 +35,45 @@ as item()+ { true() else connect:is-user-authorized($user, $password) - return if ($test-for-auth) then - let $tgcrud-url := config:get-value-from-configfile("textgrid.tgcrud") - let $metadataContainer := client:get-metadata($uri, $tgcrud-url, $sid) - let $tguri := $metadataContainer//tgmd:textgridUri/string() - let $rdfstoreUrl := connect:get-server-url($metadataContainer) - let $descendant-aggregated-uris := - if($recursive) then - client:get-latest-aggregated-uris($tguri, $rdfstoreUrl, $sid) - else - $tguri - let $image-uris := connect:get-image-uris($descendant-aggregated-uris, $tgcrud-url, $sid) - let $store-images := connect:store-images($image-uris, $tgcrud-url, $sid) - let $non-image-uris := connect:get-non-image-uris($descendant-aggregated-uris, $image-uris) - - return - (: handle the non-images :) - for $public-uri in $non-image-uris return - let $data-request-objects := client:get-data-request-objects($public-uri, $tgcrud-url, $sid) - let $header := $data-request-objects[1] - let $data-file := $data-request-objects[2] - return - if (connect:is-resource-available($header)) then - connect:process-data($public-uri, $tgcrud-url, $sid, $tguri, $metadataContainer, $data-file, $strictValidation) - else - "Resource " || $public-uri || " not found." - else - error( - QName("https://sade.textgrid.de/ns/error", "PUBLISH02"), - "error authenticating for " || $user || " on " || $config:data-root - ) + return + if (not($test-for-auth)) + then (error( + QName("https://sade.textgrid.de/ns/error", "PUBLISH02"), + "error authenticating for " || $user || " on " || $config:data-root + )) + else + let $tgcrud-url := config:get-value-from-configfile("textgrid.tgcrud") + let $metadataContainer := client:get-metadata($uri, $tgcrud-url, $sid) + let $tguri := $metadataContainer//tgmd:textgridUri/string() + let $rdfstoreUrl := connect:get-server-url($metadataContainer) + let $descendant-aggregated-uris := + if($recursive) then + client:get-latest-aggregated-uris($tguri, $rdfstoreUrl, $sid) + else + $tguri + let $image-uris := + connect:get-image-uris-with-tgsearch( + $descendant-aggregated-uris, + "filter=project.id:" || string($metadataContainer//tgmd:project/@id), + $sid + ) + + let $store-images := connect:store-images($image-uris, $tgcrud-url, $sid) + let $non-image-uris := connect:get-non-image-uris($descendant-aggregated-uris, $image-uris) + let $number-of-items-to-publish := count($non-image-uris) + let $log := util:log-system-out("got a total of " || $number-of-items-to-publish || " non-image URIs to publish.") + return + (: handle the non-images :) + for $public-uri at $pos in $non-image-uris return + let $log := util:log-system-out($pos || "/" || $number-of-items-to-publish) + let $data-request-objects := client:get-data-request-objects($public-uri, $tgcrud-url, $sid) + let $header := $data-request-objects[1] + let $data-file := $data-request-objects[2] + return + if (connect:is-resource-available($header)) then + connect:process-data($public-uri, $tgcrud-url, $sid, $tguri, $metadataContainer, $data-file, $strictValidation) + else + "Resource " || $public-uri || " not found." }; (:~ The main function for the publisher. @@ -109,7 +118,9 @@ as item()+ { connect:publish($uri, $sid, $user, $password, true(), true()) }; - +(: + : @deprecated + :) declare function connect:get-image-uris($descendant-aggregated-uris as xs:string+, $tgcrud-url as xs:string, $sid as xs:string) @@ -125,6 +136,29 @@ as xs:string* { ($newUris, $presentUris[. ne ""]) }; +declare function connect:get-image-uris-with-tgsearch( + $descendant-aggregated-uris as xs:string+, + $filters as xs:string+, + $sid as xs:string) +as item()* { + let $tgsearch-url := config:get-value-from-configfile("textgrid.nonpublic") => xs:anyURI() + let $hits := + client:tgsearch-search( + $tgsearch-url, + "format:""image/*""", + 100, + 0, + $filters, + $sid) (: returns element(tgs:result)* :) + let $image-uris := $hits//tgmd:textgridUri/text() + let $intersection := $descendant-aggregated-uris[. = $image-uris] + let $presentUris := doc($config:data-root || "/images.xml") + //image[substring-before(@uri, ".") = $descendant-aggregated-uris] + /substring-before(@uri, ".") + return + ($intersection, $presentUris[. ne ""]) +}; + declare function connect:get-non-image-uris($descendant-aggregated-uris as xs:string+, $image-uris as xs:string*) as xs:string+ {