Skip to content
Snippets Groups Projects
identification.xqm 17.2 KiB
Newer Older
  • Learn to ignore specific revisions
  • xquery version "3.0";
    
    
    (:
     : Copyright (c) 2018 Uwe Sikora
     : Copyright (c) 2018–2019 Michelle Weidling
     : Copyright (c) 2020 Stefan Hynek
     :
     : This file is part of intermediate-format.
     :
     : intermediate-format is free software: you can redistribute it and/or modify
     : it under the terms of the GNU General Public License as published by
     : the Free Software Foundation, either version 3 of the License, or
     : (at your option) any later version.
     :
     : intermediate-format is distributed in the hope that it will be useful,
     : but WITHOUT ANY WARRANTY; without even the implied warranty of
     : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     : GNU General Public License for more details.
     :
     : You should have received a copy of the GNU General Public License
     : along with intermediate-format.  If not, see <https://www.gnu.org/licenses/>.
     :)
    
    
    uwe's avatar
    uwe committed
     : IDENTIFICATION Module ("ident", "http://bdn.edition.de/intermediate_format/identification")
     : *******************************************************************************************
     : This module defines functions and variables to set reading markers in tei:lem or tei:rdg elements.
     : The problem it solves is to identify non-Blocklevel-elements self not containing Blocklevel-elements
     : on their first or last decendants path to set textcritical markers required in the printed version of a
     : BdN digital edition.
     :
    
     : The basic idea is constructing some kind of left- and right-branch AXIS for reading nodes (tei:lem and tei:rdg) describing
    
     : a save axis of non-Blocklevel nodes (non-BLE) self not including BLEs on their own left- or right-branch AXIS down the tree.
     :
    
    uwe's avatar
    uwe committed
     : It includes the helping module "markerset" holding helper functions to collect and construct reading markers
    
    uwe's avatar
    uwe committed
     : @version 2.0 (2018-01-29)
     : @note This new versions identification algorithm is more flexible and much more configurable as in the old version 1
     : @status working
     : @author Uwe Sikora
     :)
    
    module namespace ident="http://bdn-edition.de/intermediate_format/identification";
    import module namespace markerset = "http://bdn-edition.de/intermediate_format/markerset" at "markerset.xqm";
    
    
    declare default element namespace "http://www.tei-c.org/ns/1.0";
    
    
    
    uwe's avatar
    uwe committed
    (:############################# Modules Variables #############################:)
    
    
    uwe's avatar
    uwe committed
     : ident:blocklevel-elements
    
     : Variable defining Blocklevel Elements (BLE) by name
     :
    
    uwe's avatar
    uwe committed
     : @version 2.0 (2018-01-29)
     : @author Uwe Sikora
     :)
    
    declare variable $ident:blocklevel-elements := ('titlePage', 'titlePart', 'aligned', 'div', 'list', 'item', 'table', 'row', 'cell', 'head', 'p', 'note');
    
    
     : ident:apparatus
    
     : Variable defining Apparatus Elements by name
     :
    
     : @version 1.0 (2018-02-05)
     : @author Uwe Sikora
     :)
    declare variable $ident:apparatus := ('app');
    
    
     : ident:apparatus-readings
    
     : Variable defining Apparatus-Child Elements by name
     :
    
     : @version 1.0 (2018-02-05)
     : @author Uwe Sikora
     :)
    declare variable $ident:apparatus-readings := ('lem', 'rdg');
    
    uwe's avatar
    uwe committed
    
    (:############################# Modules Functions #############################:)
    
    
    uwe's avatar
    uwe committed
     : ident:in-sequence()
    
     : This function checks if nodes are includes in a sequence of nodes
    
    uwe's avatar
    uwe committed
     :
     : @param $values the nodes to check against the sequence
     : @param $sequence a sequence of AtomicTypes
     : @return xs:boolean ('true' else 'false')
    
    uwe's avatar
    uwe committed
     : @version 2.0 (2018-01-29)
     : @status working
     : @author Uwe Sikora
     :)
    
    declare function ident:in-sequence
    
        ( $values as xs:anyAtomicType* , $sequence as xs:anyAtomicType* ) as xs:boolean {
    
        $values = $sequence
    };
    
    
    uwe's avatar
    uwe committed
     : ident:is-or-are-ble()
    
     : This function checks if nodes are Blocklevelelements
    
    uwe's avatar
    uwe committed
     :
     : @param $values the nodes to check against the sequence
     : @return xs:boolean ('true' else 'false')
    
    uwe's avatar
    uwe committed
     : @version 2.0 (2018-01-29)
     : @note derived function from ident:in-sequence
     : @status working
     : @author Uwe Sikora
     :)
    
    declare function ident:is-or-are-ble
        ( $values as xs:anyAtomicType* ) as xs:boolean {
    
        $values = $ident:blocklevel-elements
    };
    
    
    
     : ident:left-branch-axis()
     : This recursive function describes a pseudo axis "LEFT-BRANCH AXIS" of a given node. The left-branch axis
     : incorporates all first nodes of a subtree (aka the left branch) represented by a node and its descendants.
    
     : In case one of the nodes on this axis is self a tei:app the axis is rerouted from the tei:app downwards the branch
     : according to defined parameters.
    
     : @param $node the nodes on the AXIS from where all following nodes and thus the AXIS itself is defined
     : @return set of node() representing the AXIS
    
    uwe's avatar
    uwe committed
     : @version 2.0 (2018-01-30)
     : @status working
     : @author Uwe Sikora
     :)
    
    declare function ident:left-branch-axis
    
    uwe's avatar
    uwe committed
        ( $node as node()? ) as node()* {
    
        let $first-child := (
            let $target := $node/child::node()[1]
            return
    
                (: AXIS CONTROLL of the left-branch :)
    
    
                (: IN CASE there is an tei:app, be ready to change the axis to
    
                   the first tei:rdg[ppl, ptl] and its first child::node()! :)
                if ( $target[self::app] ) then (
    
                    (: If tei:app has an empty tei:lem change the axis to tei:lems last child() :)
    
                    if ( empty($target/child::lem/node()) ) then (
    
                        if ( $target/child::rdg[@type eq "ppl" or @type eq "ptl"] ) then (
                            $target/child::rdg[@type eq "ppl" or @type eq "ptl"][1]/node()[1]
    
                        else if ( $target/child::rdg[@type eq "pp" or @type eq "pt"] ) then (
                            $target
                        )
                        else ()
    
                    (: If tei:app has no empty tei:lem
    
                       follow the normal axis from tei:app :)
    
                (: If there is no tei:app proceed on normal axis by default :)
    
            if($first-child) then ($first-child, ident:left-branch-axis($first-child)) else ()
    
     : ident:right-branch-axis()
     : This recursive function describes a pseudo axis "RIGHT-BRANCH AXIS" of a given node. The right-branch axis
     : incorporates all last() nodes of a subtree (aka the right branch) represented by a node and its descendants.
    
     : In case one of the nodes on this axis is self a tei:app the axis is rerouted from the tei:app downwards the branch
    
     : according to defined parameters.
    
     : @param $node the nodes in the AXIS from where all following nodes and thus the AXIS itsel is defined
     : @return set of node() representing the AXIS
    
    uwe's avatar
    uwe committed
     : @version 2.0 (2018-01-30)
     : @status working
     : @author Uwe Sikora
     :)
    
    declare function ident:right-branch-axis
    
    uwe's avatar
    uwe committed
        ( $node as node()? ) as node()* {
    
        let $last-child := (
            let $target := $node/child::node()[last()]
            return
    
                (: AXIS CONTROLL for the right-branch :)
    
                (: IN CASE there is an tei:app, be ready to change the axis ! :)
    
                if ( $target[self::app] ) then (
    
    
                    (: If tei:apps last child is a tei:rdg[ppl, ptl] change the axis to this rdg and
    
                       its last child() :)
                    if ( $target/child::node()[last()][ self::rdg[@type eq "ppl" or @type eq "ptl"] ] ) then (
                        $target/child::node()[last()]/child::node()[last()]
                    )
    
                    (: If tei:apps last child is a tei:rdg[pp, pt] stop here and return the tei:app :)
                    else if ( $target/child::node()[last()][ self::rdg[@type eq "pp" or @type eq "pt"] ] ) then (
                        $target
                    )
    
    
                    (: If tei:app has no last child tei:rdg[ppl, ptl] and its tei:lem is not empty
    
                       change the axis to tei:lems last child() :)
    
                    else if ( not(empty($target/child::lem/node())) ) then (
                        $target/lem/child::node()[last()]
    
                    )
    
                    (: If tei:app has no last child tei:rdg[ppl, ptl] and its tei:lem is empty
    
                       follow the normal axis from tei:app :)
    
                (: If there is no tei:app proceed on normal axis by default :)
    
            if($last-child) then ($last-child, ident:right-branch-axis($last-child)) else ()
    
    uwe's avatar
    uwe committed
     : ident:first-save-node()
     : This function identifies the first-save node for a given node()
     :
     : @param $node the node of which the first save node should be identified
     : @return the first save node of a defined set of save nodes
    
    uwe's avatar
    uwe committed
     : @version 2.0 (2018-01-30)
     : @status working
     : @author Uwe Sikora
     :)
    
    declare function ident:first-save-node
    
    uwe's avatar
    uwe committed
        ( $node as node() ) as node()* {
    
        let $first := ident:left-branch-axis($node)
    
                      [not ( ident:is-or-are-ble(self::node()/name()) )]
    
                      [not ( ident:is-or-are-ble( ident:left-branch-axis(self::node())/name() ) )]
    
    
    uwe's avatar
    uwe committed
     : ident:last-save-node()
     : This function identifies the last-save node for a given node()
     :
     : @param $node the node of which the last save node should be identified
     : @return the last save node of a defined set of save nodes
    
    uwe's avatar
    uwe committed
     : @version 2.0 (2018-01-30)
     : @status working
     : @author Uwe Sikora
     :)
    
    declare function ident:last-save-node
    
    uwe's avatar
    uwe committed
        ( $node as node() ) as node()* {
    
        let $last := ident:right-branch-axis($node)
    
                     [not ( ident:is-or-are-ble(self::node()/name()) )]
    
                     [not ( ident:is-or-are-ble( ident:right-branch-axis(self::node())/name() ) )]
    
    
    uwe's avatar
    uwe committed
     : ident:identify-targets()
     : This function identifies the first and last save node for a given reading (tei:lem and tei:rdg)
     : It also collect the sibling readings as shortcuts (name and attributes) to build a set
    
     : of reading markers for opening and closing Markers
    
    uwe's avatar
    uwe committed
     :
     : @param $node the reading nodegoing to be evaluated
     : @return evaluation report for the node acording to the following form
     :  - element "rdg" or "lem" incl. copied attributes
     :      - element "target"[@type = "open"] incl. @id (generated)
     :      - element "target"[@type = "close"] incl. @id (generated)
     :      - element "marker"[@type = "open"] incl. @id (generated)
     :      - element "marker"[@type = "close"] incl. @id (generated)
    
    uwe's avatar
    uwe committed
     : @version 2.0 (2018-01-31)
     : @status working
     : @author Uwe Sikora
     :)
    
    declare function ident:identify-targets
    
    uwe's avatar
    uwe committed
        ( $node as node() ) as node()* {
    
    
        let $first := ident:first-save-node($node)
    
        let $last := ident:last-save-node($node)
        let $marker-set := markerset:collect-markers($node)
    
        return
            element {$node/name()}{
                $node/@*,
                element {"target"}{
                    attribute {"type"}{ "open" },
    
    uwe's avatar
    uwe committed
                    attribute {"gid"}{ generate-id($first) }(:,
                    $first:)
    
                },
                element {"target"}{
                    attribute {"type"}{ "close" },
    
    uwe's avatar
    uwe committed
                    attribute {"gid"}{ generate-id($last) }(:,
                    $last:)
    
                },
                element {"marker"}{
                    attribute {"type"}{ "open" },
                    markerset:construct-marker-from-markerset("rdgMarker", "open", $marker-set)
                },
                element {"marker"}{
                    attribute {"type"}{ "close" },
                    reverse( markerset:construct-marker-from-markerset("rdgMarker", "close", $marker-set) )
                }
            }
    };
    
    
    
    uwe's avatar
    uwe committed
     : ident:walk()
     : This recursive function represents the main conversion which adds the reading markers
    
     : for tei:lem and tei:rdg nodes
    
    uwe's avatar
    uwe committed
     :
     : @param $nodes nodes to be converted
     : @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context
     : @return converted node
    
     : @version 2.2 (2018-02-21)
    
    uwe's avatar
    uwe committed
     : @status working
    
     : @author Uwe Sikora, Michelle Rodzis
    
    uwe's avatar
    uwe committed
     :)
    
    declare function ident:walk
    
    uwe's avatar
    uwe committed
        ( $nodes as node()*, $reading-sequence as item()* ) as item()* {
    
        for $node in $nodes
        return
            typeswitch($node)
                case processing-instruction() return ()
    
                case text() return (
                    if (normalize-space($node) eq "") then () else (
                        ident:mark-node($node, $reading-sequence)
                    )
                )
    
                case element(teiHeader) return ( $node )
    
                    considering all tei:rdg except structural-variances and textcritical tei:rdg[@type="v"]
    
                    Also ignore tei:rdg with types "typo_corr", "invisible-ref", "varying-target" - They don't need Markers
    
                case element(rdg) return (
    
                            $node/parent::app[ @type eq "structural-variance" ] or
                            $node[@type="v" or @type="typo-correction" or @type="varying-target"]
                            )
    
                        ident:mark-node( $node, ($reading-sequence, ident:identify-targets($node)) )
    
                    ) else (
                        ident:mark-node($node, $reading-sequence)
                    )
                )
    
                (: considering all tei:lem except structural-variances :)
    
                case element(lem) return (
                    if ( not($node/parent::app[ @type eq "structural-variance" ]) ) then (
    
                        ident:mark-node( $node, ($reading-sequence, ident:identify-targets($node)) )
    
                    ) else (
                        ident:mark-node($node, $reading-sequence)
                    )
                )
    
    
                default return (
                    ident:mark-node($node, $reading-sequence)
    
    uwe's avatar
    uwe committed
     : ident:mark-node()
     : This function checks if a given node is a identified first or last save node
     : and sets in case of positive identification sets opening and closing markers before and after the node
     :
     : @param $nodes nodes to be checked and in case of positive identification decorated with markers
     : @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context
     : @return converted node()
    
    uwe's avatar
    uwe committed
     : @version 2.0 (2018-02-01)
     : @status working
     : @author Uwe Sikora
     :)
    
    declare function ident:mark-node
    
    uwe's avatar
    uwe committed
        ( $node as node(), $reading-sequence as item()* ) as node()* {
    
        let $node-id := generate-id( $node )
        let $in-reading-sequence := $reading-sequence//target[@gid eq $node-id]
    
            if ($in-reading-sequence) then (
                let $marker := ident:fetch-marker-from-sequence($node-id, $reading-sequence)
                let $open := $marker[@type = "open"]/node()
                let $close := (for $item in reverse($marker[@type = "close"]) return $item/node())
                return(
                    $open,
                    if ( $node[ not(self::text()) ] ) then (
                        element{$node/name()}{
                            $node/@*,
                            ident:walk($node/node(), $reading-sequence)
                        }
                    ) else (
                        $node
    
                    $close
               )
            ) else (
                if ( $node[ not(self::text()) ] ) then (
    
                    if ($node[not(name())]) then (
                        <ERROR>{$node}</ERROR>
                    ) else (
                        element{$node/name()}{
                            $node/@*,
                            ident:walk($node/node(), $reading-sequence)
                        }
                    )
    
    uwe's avatar
    uwe committed
     : ident:mark-text()
     : This function checks if a given text() is a identified first or last save node
     : and sets in case of positive identification sets opening and closing markers before and after the node
     :
     : @param $nodes nodes to be checked and in case of positive identification decorated with markers
     : @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context
     : @return converted node()
    
    uwe's avatar
    uwe committed
     : @version 2.0 (2018-02-01)
     : @status deprecated. integrated in ident:mark-node()
     : @author Uwe Sikora
     :)
    
    (:declare function ident:mark-text
        ($node as node(), $reading-sequence as item()* ) as node()* {
    
        let $node-id := generate-id( $node )
        let $in-reading-sequence := $reading-sequence//target[@gid eq $node-id]
    
            if ($in-reading-sequence) then (
                let $marker := ident:fetch-marker-from-sequence($node-id, $reading-sequence)
                let $open := $marker[@type = "open"]/node()
                let $close := (for $item in reverse($marker[@type = "close"]) return $item/node())
                return(
                    $open, $node, $close
               )
            ) else ( $node )
    };:)
    
    
    
    uwe's avatar
    uwe committed
     : ident:fetch-marker-from-sequence()
     : Helperfunction to collect the reading markers from a given reading sequence
     :
     : @param $node-id id to be checked against the reading-sequences target-ids
     : @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context
     : @return reading markers as node()* for the node associated with node-id
    
    uwe's avatar
    uwe committed
     : @version 2.0 (2018-02-01)
     : @status working
     : @author Uwe Sikora
     :)
    
    declare function ident:fetch-marker-from-sequence
    
        ( $node-id as xs:string, $reading-sequence as item()* ) as node()* {
    
        for $seq-item in $reading-sequence
        let $found := $seq-item/target[@gid = $node-id]
        let $found-type := $found/string(@type)
        let $markers := $seq-item/marker[@type = $found-type]
        where $found
        return
            $markers