From ba712709f8a173fda16ee2a6810f59f535a4d254 Mon Sep 17 00:00:00 2001 From: Uwe Sikora <sikora@sub.uni-goettingen.de> Date: Thu, 30 Nov 2017 11:44:36 +0100 Subject: [PATCH] major changes: handling multiple tei:app-hierarchies --- .gitignore | 2 + README.md | 25 +- arokis.xqm | 61 -- bdn_IF.xquery | 495 ------------- bdnprint.xqm | 310 -------- functx.xqm | 50 -- .../intermediate_formate/inter_form.xqm | 666 ++++++++++++++++++ string.xqm => stable/modules/string.xqm | 5 +- stable/rest/intermediate_format.xql | 16 + 9 files changed, 710 insertions(+), 920 deletions(-) create mode 100644 .gitignore delete mode 100644 arokis.xqm delete mode 100644 bdn_IF.xquery delete mode 100644 bdnprint.xqm delete mode 100644 functx.xqm create mode 100644 stable/modules/intermediate_formate/inter_form.xqm rename string.xqm => stable/modules/string.xqm (91%) create mode 100644 stable/rest/intermediate_format.xql diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bef899f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/archive +/xml diff --git a/README.md b/README.md index 7cf5a80..050f901 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,23 @@ -# bdnprint_if -Intermediate Format +# bdn:IntermediateFormat +Scripts to convert bdn-TEI into an intermediate-format dealing with reading markers + +# setup and description +## stable/modules/intermediate_format/inter_form.xqm + - This is the main module integrating the conversion functions + - place it in your app modules path: "/modules/intermediate_format/inter_form.xqm" + +## stable/modules/string.xqm + - This is the a helper module dealing with strings + - place it in your app modules path: "/modules/string.xqm" + +## stable/rest/intermediate_function.xql + - This is the a conversion script running the conversion on a given document + - place it in your app somewhere or as suggested here: "/rest/intermediate_function.xql" + +# running the conversion + - call intermediate_function.xql via REST with the GET-Parameter "path" + - "path" must be a XML-URI existing in your app context (There is no exitence check yet) + - wait + +# Sample call +http://localhost:8080/exist/rest/apps/bdn/rest/intermediate_format.xql?path=/db/apps/bdn/data/samples/griesbach_full.xml diff --git a/arokis.xqm b/arokis.xqm deleted file mode 100644 index 1abc6e0..0000000 --- a/arokis.xqm +++ /dev/null @@ -1,61 +0,0 @@ -xquery version "1.0"; -module namespace arokis="http://www.arokis.com/xquery/libs/bdn/general"; -declare default element namespace "http://www.tei-c.org/ns/1.0"; - - -declare namespace functx = "http://www.functx.com"; -import module "http://www.functx.com" at "functx.xqm"; - - -(:~ - : arokis:are-nodes-in-sequence() - : This function checks if a node() from a given nodeset is or contains named Elements in a sequence. - : In this case it returns 'true' else 'false' - : - : @param $nodes the nodes() to check for BLEs - : @param $bleElements a list of defined BLEs - : @return xs:boolean ('true' else 'false') - : - : @version 1.1 (2017-09-22) - : @status working - : @author Uwe Sikora - :) -declare function arokis:are-nodes-in-sequence - ($nodes as node()*, $sequence as item()*) as xs:boolean{ - - some $node in $nodes - satisfies - if(functx:is-value-in-sequence($node/name(), $sequence)) then( - fn:true() - ) - - else ( - fn:false() - ) -}; - - - -declare function arokis:first-save-node-not-in-sequence - ($nodes as node()*, $sequence as item()*) { - - functx:first-node( - $nodes - [not( self::text() and normalize-space(.) = '' )] - [not( arokis:are-nodes-in-sequence(descendant-or-self::node(), $sequence) )] - ) - -}; - - - -declare function arokis:last-save-node-not-in-sequence - ($nodes as node()*, $sequence as item()*) { - - functx:last-node( - $nodes - [not( self::text() and normalize-space(.) = '' )] - [not( arokis:are-nodes-in-sequence(descendant-or-self::node(), $sequence) )] - ) -}; - diff --git a/bdn_IF.xquery b/bdn_IF.xquery deleted file mode 100644 index 759264d..0000000 --- a/bdn_IF.xquery +++ /dev/null @@ -1,495 +0,0 @@ -(:~ - : This module is used to create an intermediate format (nms: "intfo") to handle the complexity of the tei-data of - : the DFG-project "Bibliothek der Neologie" and to provide a format that can be - : directly ingested into the BdN-print workflow. - : - : The intermediate formats purpose is to reduce complexity with regard to following aspects: - : - text() is handled in the manner of whitespace preservation - : - tei:rdg is expanded as rdgMarkers to cope with BLE (BlockLevel Elements) - : - : NEW Elements and attributes from Intermediate Format: - : - rdgMarker [ @wit="<wit from rdg>" , @ref="<ref to rdg>" , @mark="open|close" , @type="v|pp|pt|ppl|ptl", @context="lem|rdg" ] - : - aligned [ @type="right-aligned|center-aligned" ] - : - tei:lem [ @omit="true" ] - : - : - : @author Uwe Sikora - : @version 1.2 (2017-09-15) - :) - -declare default element namespace "http://www.tei-c.org/ns/1.0"; -declare namespace saxon="http://saxon.sf.net/"; -declare namespace intfo = "http://www.bdn-edition.de/bdnPrint/intermediate_format"; - - -declare namespace functx = "http://www.functx.com"; -import module "http://www.functx.com" at "functx.xqm"; - -declare namespace arokis = "http://www.arokis.com/xquery/libs/bdn/general"; -import module "http://www.arokis.com/xquery/libs/bdn/general" at "arokis.xqm"; - -declare namespace string="http://www.arokis.com/xquery/libs/string"; -import module "http://www.arokis.com/xquery/libs/string" at "string.xqm"; - -declare namespace bdnprint="http://www.arokis.com/xquery/libs/bdn/print"; -import module "http://www.arokis.com/xquery/libs/bdn/print" at "bdnprint.xqm"; - - -(:declare option saxon:output "indent=no";:) - - -(:~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~:) -(: OWN Lib :) -(:~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~:) - -(:######################################################:) -(:################### BLE - Handling ###################:) - -(:~ - : BLE definition variable - : Array of elements that are BLE or elements that should be handelt as BLE - : - : @version 1.0 (2017-09-13) - : @author Uwe Sikora - :) -declare variable $blockLevelElements := ('titlePage', 'titlePart', 'aligned', 'div', 'list', 'item', 'table', 'row', 'cell', 'head', 'p', 'note'); - - -(:~ - : intfo:marker() - Marker Constructor - : Constructor function whch creates the marker element with name, mark-type and references - : - : @param $name The name of the marker element - : @param $mark The mark type e.g. open or close - : @param $rdg_node The node which is marked - : @return element() the marker element - : - : @version 1.1 (2017-09-13) - : @author Uwe Sikora - :) -declare function intfo:marker - ($name as xs:string, $mark as xs:string, $rdg_node as node()) as element(){ - - let $marker_name := $name - let $marker_mark := $mark - let $marker_rdg_type := data($rdg_node/@type) - let $marker_rdg_ref := data($rdg_node/@id) - let $marker_rdg_wit := replace(data($rdg_node/@wit), '#', '') - return ( - element {$marker_name} { - (:attribute bdnp_parent {$node/parent::node()/name()}, :) - attribute wit {$marker_rdg_wit}, - attribute ref {$marker_rdg_ref}, - attribute mark {$marker_mark}, - attribute type {$marker_rdg_type}, - attribute context {data($rdg_node/@context)} - } - ) -}; - - -(:~ - : intfo:buildMarkers() - : constructs rdgMarker elements from set of tei:rdg nodes - : - : @param $type The type of the marker element - : @param nodes A set of tei:rdg elements - : @return rdgMarker element()s for each rdg in the set - : - : @version 1.1 (2017-09-13) - : @author Uwe Sikora - :) -declare function intfo:buildMarkers - ($type as xs:string, $nodes as node()*) as item()* { - - for $node in $nodes - return - intfo:marker('rdgMarker', $type, $node) -}; - - -(:~ - : intfo:firstNonBleNodeId() - : This recursive function determines the id of the last NON-BLE - : - starts on the first or the last node() in depth given as first argument - : - walks up the tree by parent:node() and looks if it is BLE - : TRUE: it returns the id of the last NON-BLE - : FALSE: it goes to the next parent:node() - : - : @param $node The node to check if BLE OR NON-BLE - : @return $id The ID of the last NON-BLE - : - : @version 1.1 - : @author Uwe Sikora - : - : deprecated version: - : declare function intfo:lastNonBLE - ($node as node()) as xs:string { - - if (functx:is-value-in-sequence($node/parent::node()/name(), $blockLevelElements)) then( - $node/@id - (\:fn:generate-id($node):\) - ) else( - intfo:lastNonBLE($node/parent::node()) - ) - }; - :) -declare function intfo:firstNonBleNodeId - ($node as node()) as item() { - - if (functx:is-value-in-sequence($node/parent::node()/name(), $blockLevelElements)) then( - fn:generate-id($node) - ) - - else if ($node[parent::node()[not(parent::node())]]) then ( - fn:generate-id($node) - ) - - else ( - intfo:firstNonBleNodeId($node/parent::node()) - ) -}; - - -(:~ - : intfo:expanReadings() - : recursive function to run the rdgMarker Transformation - : - : @param $node The treestructure to transform - : @return node() The templated node() for each defined element - : - : @version 1.0 (2017-09-13) - : @author Uwe Sikora - :) -declare function intfo:expanReadings - ($nodes as node()*) as item()* { - - for $node in $nodes - return - typeswitch($node) - case text() return $node - - case comment() return $node - - case element(lem) return ( - element {name($node)} { - $node/@*, - intfo:evaluateElementForBLE($node) - } - ) - - case element(rdg) return ( - if ($node[@type = 'ppl'] or $node[@type = 'ptl']) then ( - element {name($node)} { - $node/@*, - intfo:evaluateElementForBLE($node) - } - ) - else ( - element {name($node)} { - $node/@*, - intfo:expanReadings($node/node()) - } - ) - ) - - default return ( - element { name($node) } { - $node/@*, - intfo:expanReadings($node/node()) - } - ) -}; - - -(:~ - : intfo:identifyReadings() - : This function identifies all the readings of interest and builds a reading model - : used to buil the markers later on - : - : @param $node The node for which rdg nodes() should be identified. These can be tei:lem or tei:rdg - : @return the reading model which contains all readings of interest including metadata but excluding their content - : - : @version 1.1 (2017-09-14) - : @author Uwe Sikora - :) -declare function intfo:identifyReadings - ($node as node()*) as item()* { - - let $readings := ( - if ($node[self::lem]) then ( - $node/following-sibling::rdg - ) - else if ($node[self::rdg]) then ( - $node - ) - else () - ) - - let $out := ( - for $reading in $readings - return - element { name($reading) } { - $reading/@*, - attribute {"context"}{name($node)} - } - ) - - return $out -}; - - -(:~ - : intfo:mergeReadings() - : This function merges all readings in the given set sharing the same tei:rdg[@type] - : If no type was provided 'none' is set as type - : - : @param $readings the readings as a sequence - : @return $node the merged readings - : - : @version 1.0 (2017-09-14) - : @author Uwe Sikora - :) -declare function intfo:mergeReadings - ($readings as node()*) as item()* { - - let $targets := ( - for $reading in $readings - return - if ($reading[@type]) then ( - $reading - ) - else ( - element { name($reading) } { - $reading/@*, - attribute type {'none'} - } - ) - ) - - return ( - for $type in distinct-values($targets/@type) - let $rdgs := $targets[@type = $type] - return - element {"rdg"}{ - attribute wit {$rdgs/@wit}, - attribute id {$rdgs/@id}, - attribute context {distinct-values($rdgs/@context)}, - attribute type {$type} - } - ) -}; - - -(:~ - : intfo:evaluateElementForBLE() - : This function evaluates the position of the first and last save node() [a node() that is not and does not contain a BLE], - : builds a target model, which is then evaluated with the tree and - : finally given both to setMarksInElement() to serialise the converted structure in the main tree - : - : @param $node The node to process and check for marks, mainly tei:lem and tei:rdg - : @return the converted node-set from setMarksInElement() - : - : @version 1.1 (2017-09-14) - : @author Uwe Sikora - :) -declare function intfo:evaluateElementForBLE - ($node as node()*) as item()* { - - let $new_tree := <tree>{$node/node()}</tree> - - let $firstSaveNode := arokis:first-save-node-not-in-sequence($new_tree//node(), $blockLevelElements) - let $lastSaveNode := arokis:last-save-node-not-in-sequence($new_tree//node(), $blockLevelElements) - - return - if( not( empty($firstSaveNode) and empty($lastSaveNode) ) ) then ( - - let $targets := ( - element {"targets"} { - - element {"open"} { - attribute id {intfo:firstNonBleNodeId($firstSaveNode)} - }, - - element {"close"} { - attribute id {intfo:firstNonBleNodeId($lastSaveNode)} - }, - - element {'readings'} { - (:intfo:identifyReadings($node):) - intfo:mergeReadings(intfo:identifyReadings($node)) - } - } - ) - - (: Target Model: - <targets> - <open id="[ID]" /> - <close id ="[ID]" /> - <readings> - <rdg ... /> - <rdg ... /> - ... - </readings> - </targets> - :) - - return ( -(: "
FIRST: ", fn:generate-id($firstSaveNode), "
FIRST NODE: ", intfo:lastNonBLE($firstSaveNode), " 
LAST: ", fn:generate-id($lastSaveNode), " LAST NODE: ", intfo:lastNonBLE($lastSaveNode):) -(: "
FIRST: ", fn:generate-id($firstTextNode), " LAST: ", $lastTextNode, " TREE: ", $new_tree, "
FIRST NODE: ", $first, " LAST NODE: ", $last:) - intfo:setMarksInElement($new_tree, $targets) - ) - ) - else( - attribute omit {'true'} - (:$node:) - ) -}; - - -(:~ - : THE function wich runs for the identified first and last target associated with the rdgMarker - : - The first and last Target is completed with markers - : - Every node that is not interesting gets copied - : - if there is a further rdg or lem in the tree it is processed as a new instance with intfo:transformReadings() - : - : @param $target_model The model of target nodes, that are the open/first node() and the close/last node() with ids and all readings which should be represented by markers - : @param $nodes The node-set getting processed - : @return the converted and finished serialised node() - : - : @version 1.1 (2017-09-14) - : @author Uwe Sikora - :) -declare function intfo:setMarksInElement - ($nodes as node()*, $target_model as node()) as item()* { - - for $node in $nodes - return - typeswitch($node) - case text() return ( - if (fn:generate-id($node) = data($target_model/open/@id) and fn:generate-id($node) = data($target_model/close/@id)) then ( - (:"CIRCUMFIXING: ",:)intfo:buildMarkers('open', $target_model/readings/node()), $node, intfo:buildMarkers('close', reverse($target_model/readings/node())) - ) - else if (fn:generate-id($node) = data($target_model/open/@id)) then ( - (:"FIRST: ",:)intfo:buildMarkers('open', $target_model/readings/node()), $node - ) - else if (fn:generate-id($node) = data($target_model/close/@id)) then ( - (:"LAST: ",:) $node, intfo:buildMarkers('close', reverse($target_model/readings/node())) - ) - else ( - (:"NOTHING: ",:) $node - ) - ) - - case comment() return ($node) - - case element(lem) return ( - intfo:expanReadings($node) - ) - - case element(rdg) return ( - intfo:expanReadings($node) - ) - - case element(tree) return ( - intfo:setMarksInElement($node/node(), $target_model) - ) - - default return ( - if (fn:generate-id($node) = data($target_model/open/@id) and fn:generate-id($node) = data($target_model/close/@id)) then ( - (:"CIRCUMFIXING: ",:) - intfo:buildMarkers('open', $target_model/readings/node()), - element { name($node) } { - $node/@*, - intfo:setMarksInElement($node/node(), $target_model) - }, - intfo:buildMarkers('close', reverse($target_model/readings/node())) - ) - - else if (fn:generate-id($node) = data($target_model/open/@id)) then ( - (:"FIRST: ",:) - intfo:buildMarkers('open', $target_model/readings/node()), - element { name($node) } { - $node/@*, - intfo:setMarksInElement($node/node(), $target_model) - } - ) - - else if (fn:generate-id($node) = data($target_model/close/@id)) then ( - (:"LAST: ",:) - element { name($node) } { - $node/@*, - intfo:setMarksInElement($node/node(), $target_model) - }, - intfo:buildMarkers('close', reverse($target_model/readings/node())) - - ) - - else((:"NOTHING: ", :) - element { name($node) } { - $node/@*, - intfo:setMarksInElement($node/node(), $target_model) - } - ) - ) -}; - - -(:################# END BLE - Handling #################:) -(:######################################################:) - -(: *** :) - -(:~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~:) -(: WORKFLOW :) -(:~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~:) - -let $doc := . -let $preprocessed := bdnprint:preprocessing($doc/TEI) -let $readingMarkers := intfo:expanReadings($preprocessed) -let $postprocessed := bdnprint:postprocessing($readingMarkers, '🦄') - -return - $postprocessed - (:intfo:cleanUp($readingMarkers, '぀'):) - (:intfo:transformReadings($doc/TEI):) - (:let $target := ( - <targets> - <open id="1" /> - <close id ="2" /> - <readings> - <rdg type="v" wit="a" id="r1"/> - <rdg type="v" wit="b" id="r23" /> - <rdg type="om" wit="c" id="r51" /> - <rdg wit="d" id="r514" /> - </readings> - </targets> - ) - return - intfo:mergeReadings($target/readings/node()):) - -(: let $target := ( - <div> - <readings> - <note> - <milestone/> - <seg><hi><t>u</t></hi></seg> - <p>das ist</p> <t>keine</t> dritte - <list> - <item>Liste<ptr/></item> - </list> - </note> - </readings> - <note> - <rdg type="v" wit="a" id="r1"/> - <rdg type="v" wit="b" id="r23" /> - <rdg type="om" wit="c" id="r51" /> - <rdg wit="d" id="r514" > - zhgjghjgh - <u>jhgjghg</u> - </rdg> - </note> - </div> - ) - return - intfo:BLEcheck($target/readings[1]/note/seg, $blockLevelElements):) \ No newline at end of file diff --git a/bdnprint.xqm b/bdnprint.xqm deleted file mode 100644 index 45b8b6e..0000000 --- a/bdnprint.xqm +++ /dev/null @@ -1,310 +0,0 @@ -xquery version "1.0"; -module namespace bdnprint="http://www.arokis.com/xquery/libs/bdn/print"; -declare default element namespace "http://www.tei-c.org/ns/1.0"; - -declare namespace string="http://www.arokis.com/xquery/libs/string"; -import module "http://www.arokis.com/xquery/libs/string" at "string.xqm"; - - - -(:~ - : bdnprint:preservedText() - : This function preserves whitespace in a test-node by replacing 1-N Whitespacecharacters - : by one defined preservation character - : - : @version 1.0 (2017-09-14) - : @author Uwe Sikora - :) -(:declare function bdnprint:preservedText - ($text as node(), $escape as xs:string) as item()* { - - if ( - normalize-space($text) != '' or - $text - [ self::node() = ' '] - [preceding-sibling::node()[not(self::node() = text())]] - [following-sibling::node()[not(self::node() = text())]] - - ) then ( - let $t := replace($text, '[\s]+', $escape) - return - $t - - ) else ($text) -};:) - - -(:~ - : @version 1.0 (2017-09-13) - : @deprecated used by deprecaded bdnble:id() - : @author Uwe Sikora - :) -declare function bdnprint:pos - ($x) { - - let $parent := $x/.. - for $child at $p in $parent/* - return (if ($child is $x) then $p else ()) -}; - - -(:~ - : @version 1.0 (2017-09-13) - : @deprecated replaced with fn:generate-id() - : @author Uwe Sikora - :) -declare function bdnprint:id - ($x) { - - string-join(for $n in ($x/ancestor::node(),$x) return string(bdnprint:pos($n)), "/") -}; - -(:~ - : bdnprint:indent() - : identifies wich kind of indentation is needed for tei:p etc. - : - : XPATH: identifies the first preceding node() not self:text or blank-node - : //body//p[preceding::node() - : [ not( self::text()) ] - : [ not( normalize-space(self::text()) != '')][1] - : /name() = 'note'] - : - : Idea: if the first non text or blank node() is a specific node than a specific indentation needs to be assigned - : - : - : @version 0.1 (2017-09-20) - : @author Uwe Sikora - :) -declare function bdnprint:indent - ($nodes as node()) as item()* { - - -}; - - -(:~ - : bdnprint:preprocessing() - : This function is used to preprocess the bdn-tei - : - : single whitespace between to node()[not(self::text())]: //text()[ self::node() = ' '][preceding-sibling::node()[not(self::node() = text())]][following-sibling::node()[not(self::node() = text())]] - : //textNode[preceding::textNode[1][@preserved]] - : - : @version 1.2 (2017-09-14) - : @author Uwe Sikora - :) -declare function bdnprint:preprocessing - ($nodes as node()*) as item()* { - - for $node in $nodes - return - typeswitch($node) - case text() return ( - (: This is absolutly magical! "May Her Hooves Never Be Shod":) - if ( - normalize-space($node) != '' or - $node - [ self::node() = ' '] - [preceding-sibling::node()[not(self::node() = text())]] - [following-sibling::node()[not(self::node() = text())]] - - ) then ( - string:escape-whitespace($node, '🦄') - - ) else ($node) - - (:bdnprint:preservedText($node, '🦄'):) - ) - - (: COMPLETE IGNORE :) - case comment() return ((:$node:)) - - case element(encodingDesc) return ( - bdnprint:preprocessing($node/following-sibling::node()[1]) - ) - - case element(revisionDesc) return ( - bdnprint:preprocessing($node/following-sibling::node()[1]) - ) - - case element(ptr) return ( - bdnprint:preprocessing($node/node()) - ) - - (: ELEMENT IGNORE :) - case element(choice) return ( - if ($node[child::expan and child::abbr]) then ( - bdnprint:preprocessing($node/abbr/node()) - ) - else ( - element {name($node)} { - $node/@*, - bdnprint:preprocessing($node/node()) - } - ) - ) - - case element(byline) return ( - bdnprint:preprocessing($node/node()) - ) - - case element(docAuthor) return ( - bdnprint:preprocessing($node/node()) - ) - - case element(persName) return ( - if ($node[ not (ancestor::index) ]) then ( - bdnprint:preprocessing($node/node()) - ) - else ( - element {name($node)} { - $node/@*, - bdnprint:preprocessing($node/node()) - } - ) - ) - - case element(docEdition) return ( - bdnprint:preprocessing($node/node()) - ) - - case element(docImprint) return ( - bdnprint:preprocessing($node/node()) - ) - - case element(docDate) return ( - bdnprint:preprocessing($node/node()) - ) - - case element(ref) return ( - bdnprint:preprocessing($node/node()) - ) - - case element(foreign) return ( - bdnprint:preprocessing($node/node()) - ) - - case element(div) return ( - if ($node[@type = 'section-group']) then ( - bdnprint:preprocessing($node/node()) - ) - else ( - element {name($node)} { - $node/@*, - bdnprint:preprocessing($node/node()) - } - ) - - ) - - (: CHANGE :) - case element(rdg) return ( - element {name($node)} { - $node/@*, - attribute id {fn:generate-id($node)}, - bdnprint:preprocessing($node/node()) - } - ) - - case element(hi) return ( - if($node[@rend = 'right-aligned' or @rend = 'center-aligned']) then( - element {'aligned'} { - $node/@*, - bdnprint:preprocessing($node/node()) - } - ) - else ( - element {name($node)} { - $node/@*, - bdnprint:preprocessing($node/node()) - } - ) - ) - - case element(seg) return ( - if($node[@type = 'item']) then( - element {'item'} { - $node/@*[name() != 'type'], - bdnprint:preprocessing($node/node()) - } - ) - else if($node[@type = 'row']) then( - element {'row'} { - $node/@*[name() != 'type'], - bdnprint:preprocessing($node/node()) - } - ) - else ( - element {name($node)} { - $node/@*, - (:attribute id {fn:generate-id($node)},:) - bdnprint:preprocessing($node/node()) - } - ) - ) - - default return ( - element {name($node)} { - $node/@*, - (:attribute id {fn:generate-id($node)},:) - bdnprint:preprocessing($node/node()) - } - ) -}; - - -(:~ - : bdnprint:postprocessing() - : - reduces all text() with preservation character to get rid of all conversion related whitespaces - : - kicks out all rdgMarkers an tei:rdg nodes not wanted - : - : @version 1.1 (2017-09-18) - : @author Uwe Sikora - :) -declare function bdnprint:postprocessing - ($nodes as node()*, $escaped_whitespace as xs:string) as item()* { - - for $node in $nodes - return - typeswitch($node) - case text() return ( - let $norm := string:escape-and-normalize($node, $escaped_whitespace) - (:let $reduce_expression := concat('[', $escaped_whitespace, ']+') - let $save := replace(normalize-space($node), "[\s]+", $escaped_whitespace) - let $reduce := replace($save, $reduce_expression, $escaped_whitespace):) - return - replace($norm, $escaped_whitespace, ' ') - (:$norm:) - ) - - case comment() return $node - - case element(rdgMarker) return ( - if ($node[@type != 'var-structure']) then ( - element {name($node)} { - $node/@*, - bdnprint:postprocessing($node/node(), $escaped_whitespace) - } - ) else () - ) - - case element(rdg) return ( - if ($node[@type != 'var-structure']) then ( - element {name($node)} { - $node/@*, - bdnprint:postprocessing($node/node(), $escaped_whitespace) - } - ) else ( - element {name($node)} { - $node/@*, - bdnprint:postprocessing($node/following-sibling::node()[1], $escaped_whitespace) - } - ) - ) - - default return ( - element {name($node)} { - $node/@*, - bdnprint:postprocessing($node/node(), $escaped_whitespace) - } - ) -}; \ No newline at end of file diff --git a/functx.xqm b/functx.xqm deleted file mode 100644 index 0728bfb..0000000 --- a/functx.xqm +++ /dev/null @@ -1,50 +0,0 @@ -xquery version "1.0"; -module namespace functx = "http://www.functx.com"; - - -declare function functx:is-value-in-sequence - ( $value as xs:anyAtomicType? , - $seq as xs:anyAtomicType* ) as xs:boolean { - - $value = $seq -} ; - - -declare function functx:first-node - ( $nodes as node()* ) as node()? { - - ($nodes/.)[1] -} ; - - -declare function functx:last-node - ( $nodes as node()* ) as node()? { - - ($nodes/.)[last()] -} ; - - -declare function functx:index-of-node - ( $nodes as node()* , - $nodeToFind as node() ) as xs:integer* { - - for $seq in (1 to count($nodes)) - return $seq[$nodes[$seq] is $nodeToFind] -} ; - - -declare function functx:add-attributes - ( $elements as element()* , - $attrNames as xs:QName* , - $attrValues as xs:anyAtomicType* ) as element()? { - - for $element in $elements - return element { node-name($element)} - { for $attrName at $seq in $attrNames - return if ($element/@*[node-name(.) = $attrName]) - then () - else attribute {$attrName} - {$attrValues[$seq]}, - $element/@*, - $element/node() } -} ; \ No newline at end of file diff --git a/stable/modules/intermediate_formate/inter_form.xqm b/stable/modules/intermediate_formate/inter_form.xqm new file mode 100644 index 0000000..f570e75 --- /dev/null +++ b/stable/modules/intermediate_formate/inter_form.xqm @@ -0,0 +1,666 @@ +xquery version "3.1"; + +module namespace interform="xmldb:exist:///db/apps/bdn/modules/intermediate_format/inter_form.xqm"; + +import module namespace functx="http://www.functx.com"; +import module namespace string = "xmldb:exist:///db/apps/bdn/modules/bdn_string.xqm" at "xmldb:exist:///db/apps/bdn/modules/bdn_string.xqm"; +declare default element namespace "http://www.tei-c.org/ns/1.0"; + + + +(:~ + : interform:preprocessing() + : This function is used to preprocess the bdn-tei + : + : single whitespace between to node()[not(self::text())]: //text()[ self::node() = ' '][preceding-sibling::node()[not(self::node() = text())]][following-sibling::node()[not(self::node() = text())]] + : //textNode[preceding::textNode[1][@preserved]] + : + : @version 1.2 (2017-09-14) + : @author Uwe Sikora + :) +declare function interform:preprocessing + ($nodes as node()*) as item()* { + + for $node in $nodes + return + typeswitch($node) + case processing-instruction() return () + case text() return ( + (: This is absolutly magical! "May Her Hooves Never Be Shod":) + if ( + normalize-space($node) != '' or + $node + [ self::node() = ' '] + [preceding-sibling::node()[not(self::node() = text())]] + [following-sibling::node()[not(self::node() = text())]] + + ) then ( + element {"textNode"}{ + interform:set-id($node), + string:escape-whitespace($node, '🦄') + } + + ) else () + + (:interform:preservedText($node, '🦄'):) + ) + + (: COMPLETE IGNORE :) + case comment() return ((:$node:)) + + case element(encodingDesc) return ( + interform:preprocessing($node/following-sibling::node()[1]) + ) + + case element(revisionDesc) return ( + interform:preprocessing($node/following-sibling::node()[1]) + ) + + (:case element(ptr) return ( + interform:preprocessing($node/node()) + ):) + + (: ELEMENT IGNORE :) + (:case element(choice) return ( + if ($node[child::expan and child::abbr]) then ( + interform:preprocessing($node/abbr/node()) + ) + else ( + element {name($node)} { + $node/@*, + interform:preprocessing($node/node()) + } + ) + ):) + + case element(byline) return ( + interform:preprocessing($node/node()) + ) + + case element(docAuthor) return ( + interform:preprocessing($node/node()) + ) + + case element(persName) return ( + if ($node[ not (ancestor::index) ]) then ( + interform:preprocessing($node/node()) + ) + else ( + interform:preprocessing-default($node) + ) + ) + + case element(docEdition) return ( + interform:preprocessing($node/node()) + ) + + case element(docImprint) return ( + interform:preprocessing($node/node()) + ) + + case element(docDate) return ( + interform:preprocessing($node/node()) + ) + + (:case element(ref) return ( + interform:preprocessing($node/node()) + ):) + + (:case element(foreign) return ( + interform:preprocessing($node/node()) + ):) + + case element(div) return ( + if ($node[@type = 'section-group']) then ( + interform:preprocessing($node/node()) + ) + else ( + interform:preprocessing-default($node) + ) + + ) + + (: CHANGE :) + case element(rdg) return ( + interform:preprocessing-default($node) + ) + + case element(hi) return ( + if($node[@rend = 'right-aligned' or @rend = 'center-aligned']) then( + element {'aligned'} { + $node/@*, + interform:set-id($node), + interform:preprocessing($node/node()) + } + ) + else ( + interform:preprocessing-default($node) + ) + ) + + case element(seg) return ( + if($node[@type = 'item']) then( + element {'item'} { + $node/@*[name() != 'type'], + interform:set-id($node), + interform:preprocessing($node/node()) + } + ) + else if($node[@type = 'row']) then( + element {'row'} { + $node/@*[name() != 'type'], + interform:set-id($node), + interform:preprocessing($node/node()) + } + ) + else ( + interform:preprocessing-default($node) + ) + ) + + default return ( + interform:preprocessing-default($node) + ) +}; + +declare function interform:preprocessing-default + ($node as node()) as item()* { + + element {name($node)} { + $node/@*, + interform:set-id($node), + interform:preprocessing($node/node()) + } +}; + +declare function interform:set-id + ($node as node()) as item()* { + + if ($node/ancestor-or-self::app) then ( + attribute id {fn:generate-id($node)} + ) + else () +}; + +(:~ + : interform:postprocessing() + : - reduces all text() with preservation character to get rid of all conversion related whitespaces + : - kicks out all rdgMarkers an tei:rdg nodes not wanted + : + : @version 1.1 (2017-09-18) + : @author Uwe Sikora + :) +declare function interform:postprocessing + ($nodes as node()*, $escaped_whitespace as xs:string) as item()* { + + for $node in $nodes + return + typeswitch($node) + case text() return ( + let $norm := string:escape-and-normalize($node, $escaped_whitespace) + (:let $reduce_expression := concat('[', $escaped_whitespace, ']+') + let $save := replace(normalize-space($node), "[\s]+", $escaped_whitespace) + let $reduce := replace($save, $reduce_expression, $escaped_whitespace):) + return + replace($norm, $escaped_whitespace, ' ') + (:$norm:) + ) + + case comment() return $node + + case element(rdgMarker) return ( + if ($node[@type != 'var-structure']) then ( + element {name($node)} { + $node/@*, + interform:postprocessing($node/node(), $escaped_whitespace) + } + ) else () + ) + + case element(rdg) return ( + if ($node[@type != 'var-structure']) then ( + element {name($node)} { + $node/@*, + interform:postprocessing($node/node(), $escaped_whitespace) + } + ) else ( + element {name($node)} { + $node/@*, + interform:postprocessing($node/following-sibling::node()[1], $escaped_whitespace) + } + ) + ) + + default return ( + element {name($node)} { + $node/@*, + interform:postprocessing($node/node(), $escaped_whitespace) + } + ) +}; + +(: ############################################################################################################## + : Intermediate Format: Main Conversion routine + : ******************************************** + : + : - + : - + : ############################################################################################################## :) + +declare variable $interform:appElements := ('app', 'lem', 'rdg'); +declare variable $interform:blockLevelElements := ('titlePage', 'titlePart', 'aligned', 'div', 'list', 'item', 'table', 'row', 'cell', 'head', 'p', 'note'); + +declare function interform:build-intermediate-format + ($nodes as node()*) as item()* { + + for $node in $nodes + let $preprocessing := interform:preprocessing($node) + let $app-index := interform:app-index($preprocessing) + let $map := interform:marker-targets($app-index) + return + interform:transform-map($preprocessing, $map) +}; + + +declare function interform:transform-map + ($nodes as node()*, $map) as item()* { + + for $node in $nodes + return + typeswitch($node) + case processing-instruction() return ($node) + case text() return ( + let $norm := string:escape-and-normalize($node, '🦄') + return + replace($norm, '🦄', ' ') + ) + + case comment() return ($node) + + case element(lem) return ( + if ( (count($node/node()) < 1) and $node[not(@type)]) then ( + element {"lem"}{ + $node/@*[not(name(.) eq "id")], + attribute {"type"}{"om"} + } + ) else ( + element {name($node)}{ + $node/@*[not(name(.) eq "id")], + interform:transform-map($node/node(), $map) + } + ) + ) + + case element(textNode) return ( + let $marks := interform:get-marks($node, $map) + return ( + $marks[self::open]/node(), + interform:transform-map($node/node(), $map), + $marks[self::close]/node() + ) + ) + + default return ( + let $marks := interform:get-marks($node, $map) + return ( + $marks[self::open]/node(), + element {name($node)}{ + if ( $node[ not(self::rdg) ] ) then ($node/@*[not(name(.) eq "id")]) else ($node/@*), + interform:transform-map($node/node(), $map) + }, + $marks[self::close]/node() + ) + ) +}; +(: ************************************************************************************************************** + : APP-Index Conversion + : ************************************************************************************************************** :) + +declare function interform:app-index + ($nodes as node()*) as item()* { + + let $apps := $nodes//app + let $index := ( + for $node at $nr in $apps + let $childs := $node/node() + let $entry := ( + + element {"entry"}{ + attribute {"n"}{$nr}, + (:element{"COMPARE"}{$node},:) + element{"childs"} { + for $child in $childs + return ( + interform:index-app-child($child, ($interform:blockLevelElements, $interform:appElements), $nr) + ) + } + } + ) + return + $entry + ) + + return <index>{$index}</index> + +}; + + +declare function interform:index-app-child + ($node as node()*, $sequence as item()*, $app-index as xs:integer) as item()* { + + element {name($node)}{ + $node/@*, + element {"position"}{ + let $first := interform:first-or-last-save-node("first", $node//node(), ($sequence)) + let $last := interform:first-or-last-save-node("last", $node//node(), ($sequence)) + return ( + if ($first) then ( + element{"first"}{ + attribute {"target"}{$first/string(@id)}, + attribute {"name"}{name($first)}, + $first + } + ) else (), + if ($last) then ( + element{"last"}{ + attribute {"target"}{$last/string(@id)}, + attribute {"name"}{name($last)}, + $last + } + ) else () + ) + }, + element {"markers"}{ + attribute {"index"}{$app-index}, + if ($node[self::lem]) then ( + attribute {"count"}{count($node/following-sibling::node())}, + for $sibling in $node/following-sibling::node() + return( + element {name($sibling)} { + $sibling/@*, + attribute {"context"}{"lem"} + } + ) + ) + else if ($node[self::rdg]) then ( + element {name($node)} { + $node/@*, + attribute {"context"}{"rdg"} + } + ) + else () + } + + } +}; + + +(:~ + : interform:first-or-last-save-node + : This function identifies the first or last save node() with regard to a given sequence of element names. + : When the first or last node is identified it bubbles the tree up to determine the first uncritical ancestor + : + : NOTE: works + : + : @version 1.0 (2017-11-15) + : @author Uwe Sikora + :) +declare function interform:first-or-last-save-node + ($position as xs:string, $nodes as node()*, $sequence as item()*) { + + let $node-set := $nodes + [not( self::text() )] + [not( interform:are-nodes-in-sequence(descendant-or-self::node(), $sequence) )] + + let $target := ( + if ($position eq "first") then ( + let $first := functx:first-node($node-set) + return + interform:bubble-sequence($first, $sequence) + ) + else if ($position eq "last") then ( + let $last := functx:last-node($node-set) + (: let $ancestor := $last/ancestor-or-self::node()[ not (interform:are-nodes-in-sequence(descendant-or-self::node(), $sequence)) ]:) + return + interform:bubble-sequence($last, $sequence) + ) else () + ) + + return ( + $target + ) +}; + + +(:~ + : interform:bubble-sequence + : This function bubbles up from a given node to identify the nearest uncritical ancestor with regard to a given sequence of node-names. + : If the parent node's name of the given node is already in the sequence there is no need to bubble up since it is already the save node + : we are looking for + : + : NOTE: works + : + : @version 1.0 (2017-11-15) + : @author Uwe Sikora + :) +declare function interform:bubble-sequence + ($node as node()?, $sequence) as item()* { + + if( functx:is-value-in-sequence(name($node/parent::node()), $sequence) ) then ( + $node + ) + else ( + $node/ancestor::node()[ functx:is-value-in-sequence(name(parent::node()), $sequence) ][1] + (:<t count="{ count($node/parent::node()) }" parent="{name($node/parent::node())}"> + <orig>{$node}</orig> + <ancestor> + { $node/ancestor::node()[ functx:is-value-in-sequence(name(parent::node()), $sequence) ][1] } + </ancestor> + </t>:) + ) +}; + + +(:~ + : interform:are-nodes-in-sequence() + : This function checks if a node() from a given nodeset is or contains named Elements in a sequence. + : In this case it returns 'true' else 'false' + : + : @param $nodes the nodes() to check for BLEs + : @param $bleElements a list of defined BLEs + : @return xs:boolean ('true' else 'false') + : + : @version 1.1 (2017-09-22) + : @status working + : @author Uwe Sikora + :) +declare function interform:are-nodes-in-sequence + ($nodes as node()*, $sequence as item()*) as xs:boolean{ + + some $node in $nodes + satisfies + if(functx:is-value-in-sequence($node/name(), $sequence)) then( + fn:true() + ) + + else ( + fn:false() + ) +}; + +(: ************************************************************************************************************** + : Target Mapping Conversion + : ************************************************************************************************************** :) + +declare function interform:marker-targets + ($app-index) { + + let $targets := $app-index//node()[self::first or self::last] + let $ids := distinct-values( $targets/string(@target) ) + let $map := map:merge( + for $id in $ids + let $targets-by-id := $targets[@target eq $id] + return + map:entry($id , + element {"target"} { + attribute {"id"}{$id}, + (:element {"COMPARE"}{ + $targets-by-id/ancestor::node()[self::lem or self::rdg]/parent::node()/parent::node() + },:) + element {"targetNode"}{ + $targets-by-id[1]/node() + }, + element {"markers"}{ +(: element {"open"}{interform:first-marker-set($id, $app-index)},:) +(: element {"close"}{interform:last-marker-set($id, $app-index)}:) + element {"open"}{interform:create-marker-sets($targets-by-id[self::first], "open")}, + element {"close"}{ reverse(interform:create-marker-sets($targets-by-id[self::last], "close")) } + } + } + ) + ) + + return + ($map) + +}; + + +(:~ + : interform:create-marker-sets + : This function creates marker sets for each given target. The input needs to be the last- or first-nodes(). + : Afterwards the single readings are merged for each set and rdgMarkers are build + : + : @param $marker-set the nodes() representing a set of Markers + : @param $marker-type the type of the marker ("open" or "close") + : @return set of element("rdgMarker")* + : + : @version 1.1 (2017-09-22) + : @status working + : @author Uwe Sikora + :) +declare function interform:create-marker-sets + ( $marker-set as node()* , $marker-type as xs:string) as item()* { + + let $targets := ( + for $item in $marker-set + let $entry-index := $item/ancestor::entry/string(@n) + let $markers := $item/parent::position/following-sibling::markers/node() + let $merged := interform:merge-readings($markers[not(@type eq "v")]) + order by $entry-index ascending + return + interform:build-markers($marker-type, $merged) + ) + + return $targets +}; + + +(:~ + : interform:build-markers() + : constructs rdgMarker elements from set of tei:rdg nodes + : + : @param $type The type of the marker element + : @param nodes A set of tei:rdg elements + : @return rdgMarker element()s for each rdg in the set + : + : @version 1.1 (2017-09-13) + : @author Uwe Sikora + :) +declare function interform:build-markers + ($type as xs:string, $nodes as node()*) as item()* { + + for $node in $nodes + return + interform:marker('rdgMarker', $type, $node) +}; + + +(:~ + : interform:marker() - Marker Constructor + : Constructor function whch creates the marker element with name, mark-type and references + : + : @param $name The name of the marker element + : @param $mark The mark type e.g. open or close + : @param $rdg_node The node which is marked + : @return element() the marker element + : + : @version 1.1 (2017-09-13) + : @author Uwe Sikora + :) +declare function interform:marker + ($name as xs:string, $mark as xs:string, $rdg_node as node()) as element(){ + + let $marker_name := $name + let $marker_mark := $mark + let $marker_rdg_type := data($rdg_node/@type) + let $marker_rdg_ref := data($rdg_node/@id) + let $marker_rdg_wit := replace(data($rdg_node/@wit), '#', '') + return ( + element {$marker_name} { + (:attribute bdnp_parent {$node/parent::node()/name()}, :) + attribute wit {$marker_rdg_wit}, + attribute type {$marker_rdg_type}, + attribute ref {$marker_rdg_ref}, + attribute mark {$marker_mark}, + attribute context {$rdg_node/@context} + } + ) +}; + + +(:~ + : interform:merge-readings() + : This function merges all readings in the given set sharing the same tei:rdg[@type] + : If no type was provided 'none' is set as type + : + : @param $readings the readings as a sequence + : @return $node the merged readings + : + : @version 1.0 (2017-09-14) + : @author Uwe Sikora + :) +declare function interform:merge-readings + ($readings as node()*) as item()* { + + let $targets := ( + for $reading in $readings + return + if ($reading[@type]) then ( + $reading + ) + else ( + element { name($reading) } { + $reading/@*, + attribute type {'none'} + } + ) + ) + + return ( + for $type in distinct-values($targets/@type) + let $rdgs := $targets[@type = $type] + return + element {"rdg"}{ + attribute wit {$rdgs/@wit}, + attribute id {$rdgs/@id}, + attribute context {distinct-values($rdgs/@context)}, + attribute type {$type} + } + ) +}; + +declare function interform:get-marks + ($node as node(), $map) as item()* { + + if (data($node/@id) and map:contains( $map, data($node/@id)) ) then ( + let $map-item := $map(data($node/@id)) + let $open-marks := $map-item/*:markers/*:open + let $close-marks := $map-item/*:markers/*:close + + return ( + $open-marks, + $close-marks + ) + ) else () +}; \ No newline at end of file diff --git a/string.xqm b/stable/modules/string.xqm similarity index 91% rename from string.xqm rename to stable/modules/string.xqm index 61c499f..6fa96f5 100644 --- a/string.xqm +++ b/stable/modules/string.xqm @@ -1,5 +1,6 @@ -xquery version "1.0"; -module namespace string="http://www.arokis.com/xquery/libs/string"; +xquery version "3.1"; + +module namespace string="xmldb:exist:///db/apps/bdn/modules/bdn_string.xqm"; (:~ diff --git a/stable/rest/intermediate_format.xql b/stable/rest/intermediate_format.xql new file mode 100644 index 0000000..a322fc9 --- /dev/null +++ b/stable/rest/intermediate_format.xql @@ -0,0 +1,16 @@ +xquery version "3.1"; + + +declare namespace tei = "http://www.tei-c.org/ns/1.0"; + +import module namespace functx="http://www.functx.com"; +import module namespace interform = "xmldb:exist:///db/apps/bdn/modules/intermediate_format/inter_form.xqm" at "xmldb:exist:///db/apps/bdn/modules/intermediate_format/inter_form.xqm"; + +(: http://localhost:8080/exist/rest/apps/bdn/rest/intermediate_format.xql :) +declare variable $doc-path := request:get-parameter("path", ()); + +let $doc := doc($doc-path) + +return ( + interform:build-intermediate-format($doc//tei:TEI) +) \ No newline at end of file -- GitLab