Newer
Older
(:
: Copyright (c) 2018 Uwe Sikora
: Copyright (c) 2018–2019 Michelle Weidling
: Copyright (c) 2020 Stefan Hynek
:
: This file is part of intermediate-format.
:
: intermediate-format is free software: you can redistribute it and/or modify
: it under the terms of the GNU General Public License as published by
: the Free Software Foundation, either version 3 of the License, or
: (at your option) any later version.
:
: intermediate-format is distributed in the hope that it will be useful,
: but WITHOUT ANY WARRANTY; without even the implied warranty of
: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
: GNU General Public License for more details.
:
: You should have received a copy of the GNU General Public License
: along with intermediate-format. If not, see <https://www.gnu.org/licenses/>.
:)
: IDENTIFICATION Module ("ident", "http://bdn.edition.de/intermediate_format/identification")
: *******************************************************************************************
: This module defines functions and variables to set reading markers in tei:lem or tei:rdg elements.
: The problem it solves is to identify non-Blocklevel-elements self not containing Blocklevel-elements
: on their first or last decendants path to set textcritical markers required in the printed version of a
: BdN digital edition.
:
: The basic idea is constructing some kind of left- and right-branch AXIS for reading nodes (tei:lem and tei:rdg) describing
: a save axis of non-Blocklevel nodes (non-BLE) self not including BLEs on their own left- or right-branch AXIS down the tree.
:
: It includes the helping module "markerset" holding helper functions to collect and construct reading markers
: @version 2.0 (2018-01-29)
: @note This new versions identification algorithm is more flexible and much more configurable as in the old version 1
: @status working
: @author Uwe Sikora
:)
module namespace ident="http://bdn-edition.de/intermediate_format/identification";
import module namespace markerset = "http://bdn-edition.de/intermediate_format/markerset" at "markerset.xqm";
declare default element namespace "http://www.tei-c.org/ns/1.0";
(:############################# Modules Variables #############################:)
: Variable defining Blocklevel Elements (BLE) by name
:
: @version 2.0 (2018-01-29)
: @author Uwe Sikora
:)
declare variable $ident:blocklevel-elements := ('titlePage', 'titlePart', 'aligned', 'div', 'list', 'item', 'table', 'row', 'cell', 'head', 'p', 'note');
: Variable defining Apparatus Elements by name
:
: @version 1.0 (2018-02-05)
: @author Uwe Sikora
:)
declare variable $ident:apparatus := ('app');
: Variable defining Apparatus-Child Elements by name
:
: @version 1.0 (2018-02-05)
: @author Uwe Sikora
:)
declare variable $ident:apparatus-readings := ('lem', 'rdg');
(:############################# Modules Functions #############################:)
: This function checks if nodes are includes in a sequence of nodes
:
: @param $values the nodes to check against the sequence
: @param $sequence a sequence of AtomicTypes
: @return xs:boolean ('true' else 'false')
: @version 2.0 (2018-01-29)
: @status working
: @author Uwe Sikora
:)
declare function ident:in-sequence
( $values as xs:anyAtomicType* , $sequence as xs:anyAtomicType* ) as xs:boolean {
: This function checks if nodes are Blocklevelelements
:
: @param $values the nodes to check against the sequence
: @return xs:boolean ('true' else 'false')
: @version 2.0 (2018-01-29)
: @note derived function from ident:in-sequence
: @status working
: @author Uwe Sikora
:)
declare function ident:is-or-are-ble
( $values as xs:anyAtomicType* ) as xs:boolean {
$values = $ident:blocklevel-elements
};
: ident:left-branch-axis()
: This recursive function describes a pseudo axis "LEFT-BRANCH AXIS" of a given node. The left-branch axis
: incorporates all first nodes of a subtree (aka the left branch) represented by a node and its descendants.
: In case one of the nodes on this axis is self a tei:app the axis is rerouted from the tei:app downwards the branch
: according to defined parameters.
: @param $node the nodes on the AXIS from where all following nodes and thus the AXIS itself is defined
: @return set of node() representing the AXIS
: @version 2.0 (2018-01-30)
: @status working
: @author Uwe Sikora
:)
declare function ident:left-branch-axis
let $first-child := (
let $target := $node/child::node()[1]
return
(: AXIS CONTROLL of the left-branch :)
(: IN CASE there is an tei:app, be ready to change the axis to
the first tei:rdg[ppl, ptl] and its first child::node()! :)
if ( $target[self::app] ) then (
(: If tei:app has an empty tei:lem change the axis to tei:lems last child() :)
if ( empty($target/child::lem/node()) ) then (
if ( $target/child::rdg[@type eq "ppl" or @type eq "ptl"] ) then (
$target/child::rdg[@type eq "ppl" or @type eq "ptl"][1]/node()[1]
else if ( $target/child::rdg[@type eq "pp" or @type eq "pt"] ) then (
$target
)
else ()
(: If tei:app has no empty tei:lem
follow the normal axis from tei:app :)
(: If there is no tei:app proceed on normal axis by default :)
else (
$target
)
)
return
if($first-child) then ($first-child, ident:left-branch-axis($first-child)) else ()
: ident:right-branch-axis()
: This recursive function describes a pseudo axis "RIGHT-BRANCH AXIS" of a given node. The right-branch axis
: incorporates all last() nodes of a subtree (aka the right branch) represented by a node and its descendants.
: In case one of the nodes on this axis is self a tei:app the axis is rerouted from the tei:app downwards the branch
: according to defined parameters.
: @param $node the nodes in the AXIS from where all following nodes and thus the AXIS itsel is defined
: @return set of node() representing the AXIS
: @version 2.0 (2018-01-30)
: @status working
: @author Uwe Sikora
:)
declare function ident:right-branch-axis
let $last-child := (
let $target := $node/child::node()[last()]
return
(: AXIS CONTROLL for the right-branch :)
(: IN CASE there is an tei:app, be ready to change the axis ! :)
if ( $target[self::app] ) then (
(: If tei:apps last child is a tei:rdg[ppl, ptl] change the axis to this rdg and
its last child() :)
if ( $target/child::node()[last()][ self::rdg[@type eq "ppl" or @type eq "ptl"] ] ) then (
$target/child::node()[last()]/child::node()[last()]
)
(: If tei:apps last child is a tei:rdg[pp, pt] stop here and return the tei:app :)
else if ( $target/child::node()[last()][ self::rdg[@type eq "pp" or @type eq "pt"] ] ) then (
$target
)
(: If tei:app has no last child tei:rdg[ppl, ptl] and its tei:lem is not empty
change the axis to tei:lems last child() :)
else if ( not(empty($target/child::lem/node())) ) then (
$target/lem/child::node()[last()]
)
(: If tei:app has no last child tei:rdg[ppl, ptl] and its tei:lem is empty
follow the normal axis from tei:app :)
(: If there is no tei:app proceed on normal axis by default :)
else (
$target
)
)
return
if($last-child) then ($last-child, ident:right-branch-axis($last-child)) else ()
: ident:first-save-node()
: This function identifies the first-save node for a given node()
:
: @param $node the node of which the first save node should be identified
: @return the first save node of a defined set of save nodes
: @version 2.0 (2018-01-30)
: @status working
: @author Uwe Sikora
:)
declare function ident:first-save-node
let $first := ident:left-branch-axis($node)
[not ( ident:is-or-are-ble(self::node()/name()) )]
[not ( ident:is-or-are-ble( ident:left-branch-axis(self::node())/name() ) )]
return $first[1]
};
: ident:last-save-node()
: This function identifies the last-save node for a given node()
:
: @param $node the node of which the last save node should be identified
: @return the last save node of a defined set of save nodes
: @version 2.0 (2018-01-30)
: @status working
: @author Uwe Sikora
:)
declare function ident:last-save-node
let $last := ident:right-branch-axis($node)
[not ( ident:is-or-are-ble(self::node()/name()) )]
[not ( ident:is-or-are-ble( ident:right-branch-axis(self::node())/name() ) )]
return $last[1]
};
: ident:identify-targets()
: This function identifies the first and last save node for a given reading (tei:lem and tei:rdg)
: It also collect the sibling readings as shortcuts (name and attributes) to build a set
: of reading markers for opening and closing Markers
:
: @param $node the reading nodegoing to be evaluated
: @return evaluation report for the node acording to the following form
: - element "rdg" or "lem" incl. copied attributes
: - element "target"[@type = "open"] incl. @id (generated)
: - element "target"[@type = "close"] incl. @id (generated)
: - element "marker"[@type = "open"] incl. @id (generated)
: - element "marker"[@type = "close"] incl. @id (generated)
: @version 2.0 (2018-01-31)
: @status working
: @author Uwe Sikora
:)
declare function ident:identify-targets
let $first := ident:first-save-node($node)
let $last := ident:last-save-node($node)
let $marker-set := markerset:collect-markers($node)
return
element {$node/name()}{
$node/@*,
element {"target"}{
attribute {"type"}{ "open" },
attribute {"gid"}{ generate-id($first) }(:,
$first:)
},
element {"target"}{
attribute {"type"}{ "close" },
attribute {"gid"}{ generate-id($last) }(:,
$last:)
},
element {"marker"}{
attribute {"type"}{ "open" },
markerset:construct-marker-from-markerset("rdgMarker", "open", $marker-set)
},
element {"marker"}{
attribute {"type"}{ "close" },
reverse( markerset:construct-marker-from-markerset("rdgMarker", "close", $marker-set) )
}
}
};
: ident:walk()
: This recursive function represents the main conversion which adds the reading markers
:
: @param $nodes nodes to be converted
: @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context
: @return converted node
: @author Uwe Sikora, Michelle Rodzis
( $nodes as node()*, $reading-sequence as item()* ) as item()* {
for $node in $nodes
return
typeswitch($node)
case processing-instruction() return ()
case comment() return ()
case text() return (
if (normalize-space($node) eq "") then () else (
ident:mark-node($node, $reading-sequence)
)
)
case element(teiHeader) return ( $node )
considering all tei:rdg except structural-variances and textcritical tei:rdg[@type="v"]
Also ignore tei:rdg with types "typo_corr", "invisible-ref", "varying-target" - They don't need Markers
$node/parent::app[ @type eq "structural-variance" ] or
$node[@type="v" or @type="typo-correction" or @type="varying-target"]
)
ident:mark-node( $node, ($reading-sequence, ident:identify-targets($node)) )
) else (
ident:mark-node($node, $reading-sequence)
)
)
(: considering all tei:lem except structural-variances :)
case element(lem) return (
if ( not($node/parent::app[ @type eq "structural-variance" ]) ) then (
ident:mark-node( $node, ($reading-sequence, ident:identify-targets($node)) )
) else (
ident:mark-node($node, $reading-sequence)
)
)
default return (
ident:mark-node($node, $reading-sequence)
: ident:mark-node()
: This function checks if a given node is a identified first or last save node
: and sets in case of positive identification sets opening and closing markers before and after the node
:
: @param $nodes nodes to be checked and in case of positive identification decorated with markers
: @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context
: @return converted node()
: @version 2.0 (2018-02-01)
: @status working
: @author Uwe Sikora
:)
declare function ident:mark-node
( $node as node(), $reading-sequence as item()* ) as node()* {
let $node-id := generate-id( $node )
let $in-reading-sequence := $reading-sequence//target[@gid eq $node-id]
if ($in-reading-sequence) then (
let $marker := ident:fetch-marker-from-sequence($node-id, $reading-sequence)
let $open := $marker[@type = "open"]/node()
let $close := (for $item in reverse($marker[@type = "close"]) return $item/node())
return(
$open,
if ( $node[ not(self::text()) ] ) then (
element{$node/name()}{
$node/@*,
ident:walk($node/node(), $reading-sequence)
}
) else (
$node
$close
)
) else (
if ( $node[ not(self::text()) ] ) then (
if ($node[not(name())]) then (
<ERROR>{$node}</ERROR>
) else (
element{$node/name()}{
$node/@*,
ident:walk($node/node(), $reading-sequence)
}
)
) else (
$node
)
)
};
: ident:mark-text()
: This function checks if a given text() is a identified first or last save node
: and sets in case of positive identification sets opening and closing markers before and after the node
:
: @param $nodes nodes to be checked and in case of positive identification decorated with markers
: @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context
: @return converted node()
: @version 2.0 (2018-02-01)
: @status deprecated. integrated in ident:mark-node()
: @author Uwe Sikora
:)
(:declare function ident:mark-text
($node as node(), $reading-sequence as item()* ) as node()* {
let $node-id := generate-id( $node )
let $in-reading-sequence := $reading-sequence//target[@gid eq $node-id]
if ($in-reading-sequence) then (
let $marker := ident:fetch-marker-from-sequence($node-id, $reading-sequence)
let $open := $marker[@type = "open"]/node()
let $close := (for $item in reverse($marker[@type = "close"]) return $item/node())
return(
$open, $node, $close
)
) else ( $node )
};:)
: ident:fetch-marker-from-sequence()
: Helperfunction to collect the reading markers from a given reading sequence
:
: @param $node-id id to be checked against the reading-sequences target-ids
: @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context
: @return reading markers as node()* for the node associated with node-id
: @version 2.0 (2018-02-01)
: @status working
: @author Uwe Sikora
:)
declare function ident:fetch-marker-from-sequence
( $node-id as xs:string, $reading-sequence as item()* ) as node()* {
for $seq-item in $reading-sequence
let $found := $seq-item/target[@gid = $node-id]
let $found-type := $found/string(@type)
let $markers := $seq-item/marker[@type = $found-type]
where $found
return
$markers