From 9dee0e5075254ea362c82299521913c01d5b955e Mon Sep 17 00:00:00 2001 From: uwe <arokis.u@gmail.com> Date: Sat, 3 Feb 2018 15:59:46 +0100 Subject: [PATCH] documented modules and functions --- .../intermediate_format/identification.xqm | 212 +++++++++++++++- modules/intermediate_format/markerset.xqm | 229 ++++-------------- modules/intermediate_format/preprocessing.xqm | 53 +++- .../whitespace-handling.xqm | 39 ++- 4 files changed, 333 insertions(+), 200 deletions(-) diff --git a/modules/intermediate_format/identification.xqm b/modules/intermediate_format/identification.xqm index ef9757f..8ff41e0 100644 --- a/modules/intermediate_format/identification.xqm +++ b/modules/intermediate_format/identification.xqm @@ -1,19 +1,73 @@ xquery version "3.0"; - +(:~ + : IDENTIFICATION Module ("ident", "http://bdn.edition.de/intermediate_format/identification") + : ******************************************************************************************* + : This module defines functions and variables to set reading markers in tei:lem or tei:rdg elements. + : The problem it solves is to identify non-Blocklevel-elements self not containing Blocklevel-elements + : on their first or last decendants path to set textcritical markers required in the printed version of a + : BdN digital edition. + : + : The basic idea is constructing a some kind of first- and last-descendants PATH for reading nodes (tei:lem and tei:rdg) describing + : a save PATH of non-BLE self not including Blocklevel-elements on their own first- or last-decendants paths down the tree. + : + : It includes the helping module "markerset" holding helper functions to collect and construct reading markers + + : @version 2.0 (2018-01-29) + : @note This new versions identification algorithm is more flexible and much more configurable as in the old version 1 + : @status working + : @author Uwe Sikora + :) module namespace ident="http://bdn.edition.de/intermediate_format/identification"; import module namespace markerset = "http://bdn.edition.de/intermediate_format/markerset" at "markerset.xqm"; declare default element namespace "http://www.tei-c.org/ns/1.0"; +(:############################# Modules Variables #############################:) + +(:~ + : ident:blocklevel-elements + : Variable defining Blocklevelelements by name + : + : @version 2.0 (2018-01-29) + : @author Uwe Sikora + :) declare variable $ident:blocklevel-elements := ('titlePage', 'titlePart', 'aligned', 'div', 'list', 'item', 'table', 'row', 'cell', 'head', 'p', 'note'); + +(:############################# Modules Functions #############################:) + +(:~ + : ident:in-sequence() + : This function checks if nodes are includes in a sequence of nodes + : + : @param $values the nodes to check against the sequence + : @param $sequence a sequence of AtomicTypes + : @return xs:boolean ('true' else 'false') + : + : @version 2.0 (2018-01-29) + : @status working + : @author Uwe Sikora + :) declare function ident:in-sequence ( $values as xs:anyAtomicType* , $sequence as xs:anyAtomicType*) as xs:boolean { $values = $sequence }; + +(:~ + : ident:is-or-are-ble() + : This function checks if nodes are Blocklevelelements + : + : @param $values the nodes to check against the sequence + : @return xs:boolean ('true' else 'false') + : + : @version 2.0 (2018-01-29) + : @note derived function from ident:in-sequence + : @status working + : @author Uwe Sikora + :) declare function ident:is-or-are-ble ( $values as xs:anyAtomicType* ) as xs:boolean { @@ -21,8 +75,21 @@ declare function ident:is-or-are-ble }; +(:~ + : ident:first-descendants-path() + : This recursive function describes the so called first-descendants PATH, + : that is the path of all save first nodes (1) not self a BLE and not including + : BLEs and (2) listen to defined parameters + : + : @param $node the nodes in the PATH from where all following nodes and thus the path itsel is defined + : @return set of node() representing the PATH + : + : @version 2.0 (2018-01-30) + : @status working + : @author Uwe Sikora + :) declare function ident:first-descendants-path - ($node as node()?) as node()* { + ( $node as node()? ) as node()* { let $first-child := ( let $target := $node/child::node()[1] @@ -55,8 +122,21 @@ declare function ident:first-descendants-path }; +(:~ + : ident:last-descendants-path() + : This recursive function describes the so called last-descendants PATH, + : that is the path of all save last nodes (1) not self a BLE and not including + : BLEs and (2) listen to defined parameters + : + : @param $node the nodes in the PATH from where all following nodes and thus the path itsel is defined + : @return set of node() representing the PATH + : + : @version 2.0 (2018-01-30) + : @status working + : @author Uwe Sikora + :) declare function ident:last-descendants-path - ($node as node()?) as node()* { + ( $node as node()? ) as node()* { let $last-child := ( let $target := $node/child::node()[last()] @@ -95,8 +175,19 @@ declare function ident:last-descendants-path }; +(:~ + : ident:first-save-node() + : This function identifies the first-save node for a given node() + : + : @param $node the node of which the first save node should be identified + : @return the first save node of a defined set of save nodes + : + : @version 2.0 (2018-01-30) + : @status working + : @author Uwe Sikora + :) declare function ident:first-save-node - ($node as node()) as node()* { + ( $node as node() ) as node()* { let $first := ident:first-descendants-path($node) [not ( ident:is-or-are-ble(self::node()/name()) )] @@ -106,8 +197,19 @@ declare function ident:first-save-node }; +(:~ + : ident:last-save-node() + : This function identifies the last-save node for a given node() + : + : @param $node the node of which the last save node should be identified + : @return the last save node of a defined set of save nodes + : + : @version 2.0 (2018-01-30) + : @status working + : @author Uwe Sikora + :) declare function ident:last-save-node - ($node as node()) as node()* { + ( $node as node() ) as node()* { let $last := ident:last-descendants-path($node) [not ( ident:is-or-are-ble(self::node()/name()) )] @@ -117,8 +219,26 @@ declare function ident:last-save-node }; +(:~ + : ident:identify-targets() + : This function identifies the first and last save node for a given reading (tei:lem and tei:rdg) + : It also collect the sibling readings as shortcuts (name and attributes) to build a set + : of reading markers for opening and closing Markers + : + : @param $node the reading nodegoing to be evaluated + : @return evaluation report for the node acording to the following form + : - element "rdg" or "lem" incl. copied attributes + : - element "target"[@type = "open"] incl. @id (generated) + : - element "target"[@type = "close"] incl. @id (generated) + : - element "marker"[@type = "open"] incl. @id (generated) + : - element "marker"[@type = "close"] incl. @id (generated) + : + : @version 2.0 (2018-01-31) + : @status working + : @author Uwe Sikora + :) declare function ident:identify-targets - ($node as node()) as node()* { + ( $node as node() ) as node()* { let $first := ident:first-save-node($node) let $last := ident:last-save-node($node) @@ -130,13 +250,13 @@ declare function ident:identify-targets $node/@*, element {"target"}{ attribute {"type"}{ "open" }, - attribute {"gid"}{ generate-id($first) }, - $first + attribute {"gid"}{ generate-id($first) }(:, + $first:) }, element {"target"}{ attribute {"type"}{ "close" }, - attribute {"gid"}{ generate-id($last) }, - $last + attribute {"gid"}{ generate-id($last) }(:, + $last:) }, element {"marker"}{ attribute {"type"}{ "open" }, @@ -150,8 +270,21 @@ declare function ident:identify-targets }; +(:~ + : ident:walk() + : This recursive function represents the main conversion which adds the reading markers + : for tei:lem and tei:rdg nodes + : + : @param $nodes nodes to be converted + : @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context + : @return converted node + : + : @version 2.0 (2018-02-01) + : @status working + : @author Uwe Sikora + :) declare function ident:walk - ($nodes as node()*, $reading-sequence as item()*) as item()* { + ( $nodes as node()*, $reading-sequence as item()* ) as item()* { for $node in $nodes return @@ -186,8 +319,22 @@ declare function ident:walk ) }; + +(:~ + : ident:mark-node() + : This function checks if a given node is a identified first or last save node + : and sets in case of positive identification sets opening and closing markers before and after the node + : + : @param $nodes nodes to be checked and in case of positive identification decorated with markers + : @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context + : @return converted node() + : + : @version 2.0 (2018-02-01) + : @status working + : @author Uwe Sikora + :) declare function ident:mark-node - ($node as node(), $reading-sequence as item()* ) as node()* { + ( $node as node(), $reading-sequence as item()* ) as node()* { let $node-id := generate-id( $node ) let $in-reading-sequence := $reading-sequence//target[@gid eq $node-id] @@ -220,6 +367,20 @@ declare function ident:mark-node ) }; + +(:~ + : ident:mark-text() + : This function checks if a given text() is a identified first or last save node + : and sets in case of positive identification sets opening and closing markers before and after the node + : + : @param $nodes nodes to be checked and in case of positive identification decorated with markers + : @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context + : @return converted node() + : + : @version 2.0 (2018-02-01) + : @status deprecated. integrated in ident:mark-node() + : @author Uwe Sikora + :) (:declare function ident:mark-text ($node as node(), $reading-sequence as item()* ) as node()* { @@ -237,6 +398,18 @@ declare function ident:mark-node };:) +(:~ + : ident:fetch-marker-from-sequence() + : Helperfunction to collect the reading markers from a given reading sequence + : + : @param $node-id id to be checked against the reading-sequences target-ids + : @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context + : @return reading markers as node()* for the node associated with node-id + : + : @version 2.0 (2018-02-01) + : @status working + : @author Uwe Sikora + :) declare function ident:fetch-marker-from-sequence ($node-id as xs:string, $reading-sequence as item()* ) as node()* { @@ -250,8 +423,21 @@ declare function ident:fetch-marker-from-sequence }; + +(:~ + : ident:identify-unit-test() + : Some kind of test-unit-function to eval the main identification functionality of this module on all tei:lem and tei:readings of a given xml-tree + : + : @param $nodes xml-tree to be tested + : @return test report for each tei:lem and tei:reading as node()* + : + : @version 2.0 (2018-02-01) + : @status working + : @note meant to test the identification algorithm + : @author Uwe Sikora + :) declare function ident:identify-unit-test - ($nodes as node()*) as node()* { + ( $nodes as node()* ) as node()* { for $node at $nr in $nodes//node()[self::lem or self::rdg] let $identified-targets := ident:identify-targets($node) diff --git a/modules/intermediate_format/markerset.xqm b/modules/intermediate_format/markerset.xqm index 4453242..a257146 100644 --- a/modules/intermediate_format/markerset.xqm +++ b/modules/intermediate_format/markerset.xqm @@ -1,9 +1,31 @@ xquery version "3.0"; - +(:~ + : MARKERSET Module ("markerset", "http://bdn.edition.de/intermediate_format/markerset") + : ******************************************************************************************* + : This module is a helper module and defines functions to collect and construct reading markers + : + : @version 2.0 (2018-01-29) + : @status working + : @author Uwe Sikora + :) module namespace markerset="http://bdn.edition.de/intermediate_format/markerset"; declare default element namespace "http://www.tei-c.org/ns/1.0"; +(:############################# Modules Functions #############################:) + +(:~ + : markerset:collect-markers() + : This function collect markers for a given reading. + : It destinguishes tei:lem and tei:rdg. In case of tei:lem it collects all sibling tei:rdgs. In case of tei:rdg it collect itself. + : + : @param $reading the reading node to collect readings for + : @return node() representing a markerset of readings for the given node + : + : @version 2.0 (2018-01-29) + : @status working + : @author Uwe Sikora + :) declare function markerset:collect-markers ( $reading as node()* ) as item() { @@ -34,6 +56,17 @@ declare function markerset:collect-markers }; +(:~ + : markerset:merge-markers() + : This function merges markers in a given set by the same type. It orders the merged markers according to an explicit ordering. + : + : @param $markerset node() including the markers that should be merged + : @return node()* representing the merged markerset + : + : @version 2.0 (2018-01-29) + : @status working + : @author Uwe Sikora + :) declare function markerset:merge-markers ( $markerset as node()* ) as item()* { @@ -59,8 +92,8 @@ declare function markerset:merge-markers (:~ - : interform:marker() - Marker Constructor - : Constructor function whch creates the marker element with name, mark-type and references + : markerset:marker() + : Constructor function which creates the marker element with name, mark-type and references : : @param $name The name of the marker element : @param $mark The mark type e.g. open or close @@ -84,187 +117,23 @@ declare function markerset:marker }; -declare function markerset:construct-marker-from-markerset - ( $name as xs:string, $marker-type as xs:string, $marker-set as node()* ) as item()* { - - for $marker in $marker-set/node() - return ( - markerset:marker($name, $marker-type, $marker) - ) -}; - (:~ - : interform:are-nodes-in-sequence() - : This function checks if a node() from a given nodeset is or contains named Elements in a sequence. - : In this case it returns 'true' else 'false' + : markerset:construct-marker-from-markerset + : Helping function to construct markers for a sequence of markersets : - : @param $nodes the nodes() to check for BLEs - : @param $bleElements a list of defined BLEs - : @return xs:boolean ('true' else 'false') - : - : @version 1.1 (2017-09-22) - : @status working - : @author Uwe Sikora - -declare function interform:are-nodes-in-sequence - ($nodes as node()*, $sequence as item()*) as xs:boolean{ - - some $node in $nodes - satisfies - if(functx:is-value-in-sequence($node/name(), $sequence)) then( - fn:true() - ) - - else ( - fn:false() - ) -}; - :) - - -(: ************************************************************************************************************** - : Target Mapping Conversion - : ************************************************************************************************************** :) - -(:declare function interform:marker-targets - ($app-index) { - - let $targets := $app-index//node()[self::first or self::last] - let $ids := distinct-values( $targets/string(@target) ) - let $map := map:merge( - for $id in $ids - let $targets-by-id := $targets[@target eq $id] - return - map:entry($id , - element {"target"} { - attribute {"id"}{$id}, - (\:element {"COMPARE"}{ - $targets-by-id/ancestor::node()[self::lem or self::rdg]/parent::node()/parent::node() - },:\) - element {"targetNode"}{ - $targets-by-id[1]/node() - }, - element {"markers"}{ -(\: element {"open"}{interform:first-marker-set($id, $app-index)},:\) -(\: element {"close"}{interform:last-marker-set($id, $app-index)}:\) - element {"open"}{interform:create-marker-sets($targets-by-id[self::first], "open")}, - element {"close"}{ reverse(interform:create-marker-sets($targets-by-id[self::last], "close")) } - } - } - ) - ) - - return - ($map) - -};:) - - -(:~ - : interform:create-marker-sets - : This function creates marker sets for each given target. The input needs to be the last- or first-nodes(). - : Afterwards the single readings are merged for each set and rdgMarkers are build - : - : @param $marker-set the nodes() representing a set of Markers - : @param $marker-type the type of the marker ("open" or "close") - : @return set of element("rdgMarker")* - : - : @version 1.1 (2017-09-22) - : @status working - : @author Uwe Sikora - :) -(:declare function interform:create-marker-sets - ( $marker-set as node()* , $marker-type as xs:string) as item()* { - - let $targets := ( - for $item in $marker-set - let $entry-index := $item/ancestor::entry/string(@n) - let $markers := $item/parent::position/following-sibling::markers/node() - let $merged := interform:merge-readings($markers[not(@type eq "v")]) - order by $entry-index ascending - return - interform:build-markers($marker-type, $merged) - ) - - return $targets -};:) - - -(:~ - : interform:build-markers() - : constructs rdgMarker elements from set of tei:rdg nodes - : - : @param $type The type of the marker element - : @param nodes A set of tei:rdg elements - : @return rdgMarker element()s for each rdg in the set - : - : @version 1.1 (2017-09-13) - : @author Uwe Sikora - :) -(:declare function interform:build-markers - ($type as xs:string, $nodes as node()*) as item()* { - - for $node in $nodes - return - interform:marker('rdgMarker', $type, $node) -};:) - - - - - -(:~ - : interform:merge-readings() - : This function merges all readings in the given set sharing the same tei:rdg[@type] - : If no type was provided 'none' is set as type + : @param $name The name of the marker element + : @param $marker-type The mark type e.g. open or close + : @param $marker-set The markersets for which reading markers shall be coonstructed + : @return item()* representing the constructed rdgMarker sets : - : @param $readings the readings as a sequence - : @return $node the merged readings - : - : @version 1.0 (2017-09-14) + : @version 1.0 (2018-02-29) : @author Uwe Sikora :) -(:declare function interform:merge-readings - ($readings as node()*) as item()* { - - let $targets := ( - for $reading in $readings - return - if ($reading[@type]) then ( - $reading - ) - else ( - element { name($reading) } { - $reading/@*, - attribute type {'none'} - } - ) - ) +declare function markerset:construct-marker-from-markerset + ( $name as xs:string, $marker-type as xs:string, $marker-set as node()* ) as item()* { - return ( - for $type in distinct-values($targets/@type) - let $rdgs := $targets[@type = $type] - return - element {"rdg"}{ - attribute wit {$rdgs/@wit}, - attribute id {$rdgs/@id}, - attribute context {distinct-values($rdgs/@context)}, - attribute type {$type} - } + for $marker in $marker-set/node() + return ( + markerset:marker($name, $marker-type, $marker) ) -};:) - -(:declare function interform:get-marks - ($node as node(), $map) as item()* { - - if (data($node/@id) and map:contains( $map, data($node/@id)) ) then ( - let $map-item := $map(data($node/@id)) - let $open-marks := $map-item/*:markers/*:open - let $close-marks := $map-item/*:markers/*:close - - return ( - $open-marks, - $close-marks - ) - ) else () -};:) \ No newline at end of file +}; \ No newline at end of file diff --git a/modules/intermediate_format/preprocessing.xqm b/modules/intermediate_format/preprocessing.xqm index 654491a..1e79955 100644 --- a/modules/intermediate_format/preprocessing.xqm +++ b/modules/intermediate_format/preprocessing.xqm @@ -1,11 +1,34 @@ xquery version "3.0"; - +(:~ + : PREPROCESSING Module ("pre", "http://bdn.edition.de/intermediate_format/preprocessing") + : ******************************************************************************************* + : This module contains the preprocessing routines for the intermediate format + : + : It imports the whitespace handling helper module to make some whitespace handling duricng the preprocessing + + : @version 2.0 (2018-01-29) + : @status working + : @author Uwe Sikora + :) module namespace pre="http://bdn.edition.de/intermediate_format/preprocessing"; import module namespace whitespace = "http://bdn.edition.de/intermediate_format/whitespace_handling" at "whitespace-handling.xqm"; declare default element namespace "http://www.tei-c.org/ns/1.0"; +(:############################# Modules Functions #############################:) + +(:~ + : pre:preprocessing-textNode + : preprocessing function which converts each text() into a xml-node "textNode". This function is a experimental fall back solution and not the main preprocessing routine! + : + : @param $nodes the nodes to be converted + : @return item()* representing the converted node + : + : @version 1.2 (2017-10-15) + : @status working + : @author Uwe Sikora + :) declare function pre:preprocessing-textNode ($nodes as node()*) as item()* { @@ -67,7 +90,20 @@ declare function pre:preprocessing-textNode }; -(: Would be great if $recursive-function would be a real function and not a node-sequence :) +(:~ + : pre:pre:default-element + : function that suites as default element constructor for the preproseccing conversion. + : It is more or less a copy function, copying the elements name and its node and recurively leeds the conversion to its child-nodes + : + : @param $node the node to be copied + : @param $recursive-function the recursive function as some kind of call back to the main conversion + : @return item()* representing the converted node + : + : @version 1.0 (2018-01-31) + : @note Would be great if $recursive-function would be a real function and not a node-sequence (TO-DO) + : @status working + : @author Uwe Sikora + :) declare function pre:default-element ( $node as node(), $recursive-function as node()* ) as item()* { @@ -77,6 +113,18 @@ declare function pre:default-element } }; + +(:~ + : pre:preprocessing + : main preprocessing function. + : + : @param $nodes the nodes to be converted + : @return item()* representing the converted node + : + : @version 2.0 (2018-02-01) + : @status working + : @author Uwe Sikora + :) declare function pre:preprocessing ($nodes as node()*) as item()* { @@ -84,6 +132,7 @@ declare function pre:preprocessing return typeswitch($node) case processing-instruction() return () + case text() return ( whitespace:text($node, " ") ) diff --git a/modules/intermediate_format/whitespace-handling.xqm b/modules/intermediate_format/whitespace-handling.xqm index 0fc375f..66539a4 100644 --- a/modules/intermediate_format/whitespace-handling.xqm +++ b/modules/intermediate_format/whitespace-handling.xqm @@ -1,8 +1,31 @@ xquery version "3.0"; - +(:~ + : WHITESPACE Module ("whitespace", "http://bdn.edition.de/intermediate_format/whitespace_handling") + : ******************************************************************************************* + : This module contains the functions to handle different whitespace operations on text + : + : @version 1.0 (2018-01-02) + : @status working + : @author Uwe Sikora + :) module namespace whitespace="http://bdn.edition.de/intermediate_format/whitespace_handling"; declare default element namespace "http://www.tei-c.org/ns/1.0"; + +(:############################# Modules Functions #############################:) + +(:~ + : whitespace:text() + : This function handles whitespace in defined text() nodes + : + : @param $text the text-node to be converted + : @param $escape-char a optional escape-character replacing all whitespace characters + : @return text()* representing the escaped text() + : + : @version 2.0 (2018-01-30) + : @status working + : @author Uwe Sikora + :) declare function whitespace:text ( $text as text()*, $escape-char as xs:string? ) as text()* { @@ -22,16 +45,22 @@ declare function whitespace:text else () }; + (:~ - : string:escape-whitespace + : whitespace:escape-text() : This function replaces whitespaces in a text() - : with one defined preservation character + : with a defined preservation character + : + : @param $text the text-node to be converted + : @param $escape the escape-character replacing all whitespace characters + : @return text()* representing the escaped text() : - : @version 1.0 (2017-09-14) + : @version 2.0 (2018-01-30) + : @status working : @author Uwe Sikora :) declare function whitespace:escape-text - ($text, $escape as xs:string) as text()* { + ( $text, $escape as xs:string ) as text()* { text {replace($text, '[\s]+', $escape)} }; -- GitLab