xquery version "3.0"; (: : Copyright (c) 2018 Uwe Sikora : Copyright (c) 2018–2019 Michelle Weidling : Copyright (c) 2020 Stefan Hynek : : This file is part of intermediate-format. : : intermediate-format is free software: you can redistribute it and/or modify : it under the terms of the GNU General Public License as published by : the Free Software Foundation, either version 3 of the License, or : (at your option) any later version. : : intermediate-format is distributed in the hope that it will be useful, : but WITHOUT ANY WARRANTY; without even the implied warranty of : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the : GNU General Public License for more details. : : You should have received a copy of the GNU General Public License : along with intermediate-format. If not, see <https://www.gnu.org/licenses/>. :) (:~ : IDENTIFICATION Module ("ident", "http://bdn.edition.de/intermediate_format/identification") : ******************************************************************************************* : This module defines functions and variables to set reading markers in tei:lem or tei:rdg elements. : The problem it solves is to identify non-Blocklevel-elements self not containing Blocklevel-elements : on their first or last decendants path to set textcritical markers required in the printed version of a : BdN digital edition. : : The basic idea is constructing some kind of left- and right-branch AXIS for reading nodes (tei:lem and tei:rdg) describing : a save axis of non-Blocklevel nodes (non-BLE) self not including BLEs on their own left- or right-branch AXIS down the tree. : : It includes the helping module "markerset" holding helper functions to collect and construct reading markers : @version 2.0 (2018-01-29) : @note This new versions identification algorithm is more flexible and much more configurable as in the old version 1 : @status working : @author Uwe Sikora :) module namespace ident="http://bdn-edition.de/intermediate_format/identification"; import module namespace markerset = "http://bdn-edition.de/intermediate_format/markerset" at "markerset.xqm"; declare default element namespace "http://www.tei-c.org/ns/1.0"; (:############################# Modules Variables #############################:) (:~ : ident:blocklevel-elements : Variable defining Blocklevel Elements (BLE) by name : : @version 2.0 (2018-01-29) : @author Uwe Sikora :) declare variable $ident:blocklevel-elements := ('titlePage', 'titlePart', 'aligned', 'div', 'list', 'item', 'table', 'row', 'cell', 'head', 'p', 'note'); (:~ : ident:apparatus : Variable defining Apparatus Elements by name : : @version 1.0 (2018-02-05) : @author Uwe Sikora :) declare variable $ident:apparatus := ('app'); (:~ : ident:apparatus-readings : Variable defining Apparatus-Child Elements by name : : @version 1.0 (2018-02-05) : @author Uwe Sikora :) declare variable $ident:apparatus-readings := ('lem', 'rdg'); (:############################# Modules Functions #############################:) (:~ : ident:in-sequence() : This function checks if nodes are includes in a sequence of nodes : : @param $values the nodes to check against the sequence : @param $sequence a sequence of AtomicTypes : @return xs:boolean ('true' else 'false') : : @version 2.0 (2018-01-29) : @status working : @author Uwe Sikora :) declare function ident:in-sequence ( $values as xs:anyAtomicType* , $sequence as xs:anyAtomicType* ) as xs:boolean { $values = $sequence }; (:~ : ident:is-or-are-ble() : This function checks if nodes are Blocklevelelements : : @param $values the nodes to check against the sequence : @return xs:boolean ('true' else 'false') : : @version 2.0 (2018-01-29) : @note derived function from ident:in-sequence : @status working : @author Uwe Sikora :) declare function ident:is-or-are-ble ( $values as xs:anyAtomicType* ) as xs:boolean { $values = $ident:blocklevel-elements }; (:~ : ident:left-branch-axis() : This recursive function describes a pseudo axis "LEFT-BRANCH AXIS" of a given node. The left-branch axis : incorporates all first nodes of a subtree (aka the left branch) represented by a node and its descendants. : In case one of the nodes on this axis is self a tei:app the axis is rerouted from the tei:app downwards the branch : according to defined parameters. : : @param $node the nodes on the AXIS from where all following nodes and thus the AXIS itself is defined : @return set of node() representing the AXIS : : @version 2.0 (2018-01-30) : @status working : @author Uwe Sikora :) declare function ident:left-branch-axis ( $node as node()? ) as node()* { let $first-child := ( let $target := $node/child::node()[1] return (: AXIS CONTROLL of the left-branch :) (: IN CASE there is an tei:app, be ready to change the axis to the first tei:rdg[ppl, ptl] and its first child::node()! :) if ( $target[self::app] ) then ( (: If tei:app has an empty tei:lem change the axis to tei:lems last child() :) if ( empty($target/child::lem/node()) ) then ( if ( $target/child::rdg[@type eq "ppl" or @type eq "ptl"] ) then ( $target/child::rdg[@type eq "ppl" or @type eq "ptl"][1]/node()[1] ) else if ( $target/child::rdg[@type eq "pp" or @type eq "pt"] ) then ( $target ) else () ) (: If tei:app has no empty tei:lem follow the normal axis from tei:app :) else ( $target ) ) (: If there is no tei:app proceed on normal axis by default :) else ( $target ) ) return if($first-child) then ($first-child, ident:left-branch-axis($first-child)) else () }; (:~ : ident:right-branch-axis() : This recursive function describes a pseudo axis "RIGHT-BRANCH AXIS" of a given node. The right-branch axis : incorporates all last() nodes of a subtree (aka the right branch) represented by a node and its descendants. : In case one of the nodes on this axis is self a tei:app the axis is rerouted from the tei:app downwards the branch : according to defined parameters. : : @param $node the nodes in the AXIS from where all following nodes and thus the AXIS itsel is defined : @return set of node() representing the AXIS : : @version 2.0 (2018-01-30) : @status working : @author Uwe Sikora :) declare function ident:right-branch-axis ( $node as node()? ) as node()* { let $last-child := ( let $target := $node/child::node()[last()] return (: AXIS CONTROLL for the right-branch :) (: IN CASE there is an tei:app, be ready to change the axis ! :) if ( $target[self::app] ) then ( (: If tei:apps last child is a tei:rdg[ppl, ptl] change the axis to this rdg and its last child() :) if ( $target/child::node()[last()][ self::rdg[@type eq "ppl" or @type eq "ptl"] ] ) then ( $target/child::node()[last()]/child::node()[last()] ) (: If tei:apps last child is a tei:rdg[pp, pt] stop here and return the tei:app :) else if ( $target/child::node()[last()][ self::rdg[@type eq "pp" or @type eq "pt"] ] ) then ( $target ) (: If tei:app has no last child tei:rdg[ppl, ptl] and its tei:lem is not empty change the axis to tei:lems last child() :) else if ( not(empty($target/child::lem/node())) ) then ( $target/lem/child::node()[last()] ) (: If tei:app has no last child tei:rdg[ppl, ptl] and its tei:lem is empty follow the normal axis from tei:app :) else ( $target ) ) (: If there is no tei:app proceed on normal axis by default :) else ( $target ) ) return if($last-child) then ($last-child, ident:right-branch-axis($last-child)) else () }; (:~ : ident:first-save-node() : This function identifies the first-save node for a given node() : : @param $node the node of which the first save node should be identified : @return the first save node of a defined set of save nodes : : @version 2.0 (2018-01-30) : @status working : @author Uwe Sikora :) declare function ident:first-save-node ( $node as node() ) as node()* { let $first := ident:left-branch-axis($node) [not ( ident:is-or-are-ble(self::node()/name()) )] [not ( ident:is-or-are-ble( ident:left-branch-axis(self::node())/name() ) )] return $first[1] }; (:~ : ident:last-save-node() : This function identifies the last-save node for a given node() : : @param $node the node of which the last save node should be identified : @return the last save node of a defined set of save nodes : : @version 2.0 (2018-01-30) : @status working : @author Uwe Sikora :) declare function ident:last-save-node ( $node as node() ) as node()* { let $last := ident:right-branch-axis($node) [not ( ident:is-or-are-ble(self::node()/name()) )] [not ( ident:is-or-are-ble( ident:right-branch-axis(self::node())/name() ) )] return $last[1] }; (:~ : ident:identify-targets() : This function identifies the first and last save node for a given reading (tei:lem and tei:rdg) : It also collect the sibling readings as shortcuts (name and attributes) to build a set : of reading markers for opening and closing Markers : : @param $node the reading nodegoing to be evaluated : @return evaluation report for the node acording to the following form : - element "rdg" or "lem" incl. copied attributes : - element "target"[@type = "open"] incl. @id (generated) : - element "target"[@type = "close"] incl. @id (generated) : - element "marker"[@type = "open"] incl. @id (generated) : - element "marker"[@type = "close"] incl. @id (generated) : : @version 2.0 (2018-01-31) : @status working : @author Uwe Sikora :) declare function ident:identify-targets ( $node as node() ) as node()* { let $first := ident:first-save-node($node) let $last := ident:last-save-node($node) let $marker-set := markerset:collect-markers($node) return element {$node/name()}{ $node/@*, element {"target"}{ attribute {"type"}{ "open" }, attribute {"gid"}{ generate-id($first) }(:, $first:) }, element {"target"}{ attribute {"type"}{ "close" }, attribute {"gid"}{ generate-id($last) }(:, $last:) }, element {"marker"}{ attribute {"type"}{ "open" }, markerset:construct-marker-from-markerset("rdgMarker", "open", $marker-set) }, element {"marker"}{ attribute {"type"}{ "close" }, reverse( markerset:construct-marker-from-markerset("rdgMarker", "close", $marker-set) ) } } }; (:~ : ident:walk() : This recursive function represents the main conversion which adds the reading markers : for tei:lem and tei:rdg nodes : : @param $nodes nodes to be converted : @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context : @return converted node : : @version 2.2 (2018-02-21) : @status working : @author Uwe Sikora, Michelle Rodzis :) declare function ident:walk ( $nodes as node()*, $reading-sequence as item()* ) as item()* { for $node in $nodes return typeswitch($node) case processing-instruction() return () case comment() return () case text() return ( if (normalize-space($node) eq "") then () else ( ident:mark-node($node, $reading-sequence) ) ) case element(teiHeader) return ( $node ) (: considering all tei:rdg except structural-variances and textcritical tei:rdg[@type="v"] Also ignore tei:rdg with types "typo_corr", "invisible-ref", "varying-target" - They don't need Markers :) case element(rdg) return ( if ( not( $node/parent::app[ @type eq "structural-variance" ] or $node[@type="v" or @type="typo-correction" or @type="varying-target"] ) ) then ( ident:mark-node( $node, ($reading-sequence, ident:identify-targets($node)) ) ) else ( ident:mark-node($node, $reading-sequence) ) ) (: considering all tei:lem except structural-variances :) case element(lem) return ( if ( not($node/parent::app[ @type eq "structural-variance" ]) ) then ( ident:mark-node( $node, ($reading-sequence, ident:identify-targets($node)) ) ) else ( ident:mark-node($node, $reading-sequence) ) ) default return ( ident:mark-node($node, $reading-sequence) ) }; (:~ : ident:mark-node() : This function checks if a given node is a identified first or last save node : and sets in case of positive identification sets opening and closing markers before and after the node : : @param $nodes nodes to be checked and in case of positive identification decorated with markers : @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context : @return converted node() : : @version 2.0 (2018-02-01) : @status working : @author Uwe Sikora :) declare function ident:mark-node ( $node as node(), $reading-sequence as item()* ) as node()* { let $node-id := generate-id( $node ) let $in-reading-sequence := $reading-sequence//target[@gid eq $node-id] return if ($in-reading-sequence) then ( let $marker := ident:fetch-marker-from-sequence($node-id, $reading-sequence) let $open := $marker[@type = "open"]/node() let $close := (for $item in reverse($marker[@type = "close"]) return $item/node()) return( $open, if ( $node[ not(self::text()) ] ) then ( element{$node/name()}{ $node/@*, ident:walk($node/node(), $reading-sequence) } ) else ( $node ), $close ) ) else ( if ( $node[ not(self::text()) ] ) then ( if ($node[not(name())]) then ( <ERROR>{$node}</ERROR> ) else ( element{$node/name()}{ $node/@*, ident:walk($node/node(), $reading-sequence) } ) ) else ( $node ) ) }; (:~ : ident:mark-text() : This function checks if a given text() is a identified first or last save node : and sets in case of positive identification sets opening and closing markers before and after the node : : @param $nodes nodes to be checked and in case of positive identification decorated with markers : @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context : @return converted node() : : @version 2.0 (2018-02-01) : @status deprecated. integrated in ident:mark-node() : @author Uwe Sikora :) (:declare function ident:mark-text ($node as node(), $reading-sequence as item()* ) as node()* { let $node-id := generate-id( $node ) let $in-reading-sequence := $reading-sequence//target[@gid eq $node-id] return if ($in-reading-sequence) then ( let $marker := ident:fetch-marker-from-sequence($node-id, $reading-sequence) let $open := $marker[@type = "open"]/node() let $close := (for $item in reverse($marker[@type = "close"]) return $item/node()) return( $open, $node, $close ) ) else ( $node ) };:) (:~ : ident:fetch-marker-from-sequence() : Helperfunction to collect the reading markers from a given reading sequence : : @param $node-id id to be checked against the reading-sequences target-ids : @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context : @return reading markers as node()* for the node associated with node-id : : @version 2.0 (2018-02-01) : @status working : @author Uwe Sikora :) declare function ident:fetch-marker-from-sequence ( $node-id as xs:string, $reading-sequence as item()* ) as node()* { for $seq-item in $reading-sequence let $found := $seq-item/target[@gid = $node-id] let $found-type := $found/string(@type) let $markers := $seq-item/marker[@type = $found-type] where $found return $markers };