Newer
Older
(:~
: IDENTIFICATION Module ("ident", "http://bdn.edition.de/intermediate_format/identification")
: *******************************************************************************************
: This module defines functions and variables to set reading markers in tei:lem or tei:rdg elements.
: The problem it solves is to identify non-Blocklevel-elements self not containing Blocklevel-elements
: on their first or last decendants path to set textcritical markers required in the printed version of a
: BdN digital edition.
:
: The basic idea is constructing a some kind of first- and last-descendants PATH for reading nodes (tei:lem and tei:rdg) describing
: a save PATH of non-BLE self not including Blocklevel-elements on their own first- or last-decendants paths down the tree.
:
: It includes the helping module "markerset" holding helper functions to collect and construct reading markers
: @version 2.0 (2018-01-29)
: @note This new versions identification algorithm is more flexible and much more configurable as in the old version 1
: @status working
: @author Uwe Sikora
:)
module namespace ident="http://bdn.edition.de/intermediate_format/identification";
import module namespace markerset = "http://bdn.edition.de/intermediate_format/markerset" at "markerset.xqm";
declare default element namespace "http://www.tei-c.org/ns/1.0";
(:############################# Modules Variables #############################:)
(:~
: ident:blocklevel-elements
: Variable defining Blocklevelelements by name
:
: @version 2.0 (2018-01-29)
: @author Uwe Sikora
:)
declare variable $ident:blocklevel-elements := ('titlePage', 'titlePart', 'aligned', 'div', 'list', 'item', 'table', 'row', 'cell', 'head', 'p', 'note');
(:############################# Modules Functions #############################:)
(:~
: ident:in-sequence()
: This function checks if nodes are includes in a sequence of nodes
:
: @param $values the nodes to check against the sequence
: @param $sequence a sequence of AtomicTypes
: @return xs:boolean ('true' else 'false')
:
: @version 2.0 (2018-01-29)
: @status working
: @author Uwe Sikora
:)
declare function ident:in-sequence
( $values as xs:anyAtomicType* , $sequence as xs:anyAtomicType*) as xs:boolean {
$values = $sequence
};
(:~
: ident:is-or-are-ble()
: This function checks if nodes are Blocklevelelements
:
: @param $values the nodes to check against the sequence
: @return xs:boolean ('true' else 'false')
:
: @version 2.0 (2018-01-29)
: @note derived function from ident:in-sequence
: @status working
: @author Uwe Sikora
:)
declare function ident:is-or-are-ble
( $values as xs:anyAtomicType* ) as xs:boolean {
$values = $ident:blocklevel-elements
};
: ident:left-branch-axis()
: This recursive function describes a pseudo axis "LEFT-BRANCH AXIS" of a given node. The left-branch axis
: incorporates all first nodes of a subtree (aka the left branch) represented by a node and its descendants.
: In case one of the nodes on this axis is self a tei:app the axis is rerouted from the tei:app downwards the branch
: according to defined parameters.
: @param $node the nodes on the AXIS from where all following nodes and thus the AXIS itself is defined
: @return set of node() representing the AXIS
:
: @version 2.0 (2018-01-30)
: @status working
: @author Uwe Sikora
:)
declare function ident:left-branch-axis
let $first-child := (
let $target := $node/child::node()[1]
return
(: AXIS CONTROLL of the left-branch :)
(: IN CASE there is an tei:app, be ready to change the axis to
the first tei:rdg[ppl, ptl] and its first child::node()! :)
if ( $target[self::app] ) then (
(: If tei:app has an empty tei:lem change the axis to tei:lems last child() :)
if ( empty($target/child::lem/node()) ) then (
$target/child::rdg[@type eq "ppl" or @type eq "ptl"][1]/node()[1]
)
(: If tei:app has no empty tei:lem
follow the normal axis from tei:app :)
else (
$target
)
)
(: If there is no tei:app proceed on normal axis by default :)
else (
$target
)
)
return
if($first-child) then ($first-child, ident:left-branch-axis($first-child)) else ()
: ident:right-branch-axis()
: This recursive function describes a pseudo axis "RIGHT-BRANCH AXIS" of a given node. The right-branch axis
: incorporates all last() nodes of a subtree (aka the right branch) represented by a node and its descendants.
: In case one of the nodes on this axis is self a tei:app the axis is rerouted from the tei:app downwards the branch
: according to defined parameters.
: @param $node the nodes in the AXIS from where all following nodes and thus the AXIS itsel is defined
: @return set of node() representing the AXIS
:
: @version 2.0 (2018-01-30)
: @status working
: @author Uwe Sikora
:)
declare function ident:right-branch-axis
let $last-child := (
let $target := $node/child::node()[last()]
return
(: AXIS CONTROLL for the right-branch :)
(: IN CASE there is an tei:app, be ready to change the axis ! :)
if ( $target[self::app] ) then (
(: If tei:apps last child is a tei:rdg[ppl, ptl] change the axis to this rdg and
its last child() :)
if ( $target/child::node()[last()][ self::rdg[@type eq "ppl" or @type eq "ptl"] ] ) then (
$target/child::node()[last()]/child::node()[last()]
)
(: If tei:app has no last child tei:rdg[ppl, ptl] and its tei:lem is not empty
change the axis to tei:lems last child() :)
else if ( not(empty($target/child::lem/node())) ) then (
$target/lem/child::node()[last()]
)
(: If tei:app has no last child tei:rdg[ppl, ptl] and its tei:lem is empty
follow the normal axis from tei:app :)
else (
$target
)
)
(: If there is no tei:app proceed on normal axis by default :)
else (
$target
)
)
return
if($last-child) then ($last-child, ident:right-branch-axis($last-child)) else ()
(:~
: ident:first-save-node()
: This function identifies the first-save node for a given node()
:
: @param $node the node of which the first save node should be identified
: @return the first save node of a defined set of save nodes
:
: @version 2.0 (2018-01-30)
: @status working
: @author Uwe Sikora
:)
declare function ident:first-save-node
let $first := ident:left-branch-axis($node)
[not ( ident:is-or-are-ble(self::node()/name()) )]
[not ( ident:is-or-are-ble( ident:left-branch-axis(self::node())/name() ) )]
return $first[1]
};
(:~
: ident:last-save-node()
: This function identifies the last-save node for a given node()
:
: @param $node the node of which the last save node should be identified
: @return the last save node of a defined set of save nodes
:
: @version 2.0 (2018-01-30)
: @status working
: @author Uwe Sikora
:)
declare function ident:last-save-node
let $last := ident:right-branch-axis($node)
[not ( ident:is-or-are-ble(self::node()/name()) )]
[not ( ident:is-or-are-ble( ident:right-branch-axis(self::node())/name() ) )]
return $last[1]
};
(:~
: ident:identify-targets()
: This function identifies the first and last save node for a given reading (tei:lem and tei:rdg)
: It also collect the sibling readings as shortcuts (name and attributes) to build a set
: of reading markers for opening and closing Markers
:
: @param $node the reading nodegoing to be evaluated
: @return evaluation report for the node acording to the following form
: - element "rdg" or "lem" incl. copied attributes
: - element "target"[@type = "open"] incl. @id (generated)
: - element "target"[@type = "close"] incl. @id (generated)
: - element "marker"[@type = "open"] incl. @id (generated)
: - element "marker"[@type = "close"] incl. @id (generated)
:
: @version 2.0 (2018-01-31)
: @status working
: @author Uwe Sikora
:)
declare function ident:identify-targets
let $first := ident:first-save-node($node)
let $last := ident:last-save-node($node)
let $marker-set := markerset:collect-markers($node)
let $markers := markerset:construct-marker-from-markerset("rdgMarker", "open", $marker-set)
return
element {$node/name()}{
$node/@*,
element {"target"}{
attribute {"type"}{ "open" },
attribute {"gid"}{ generate-id($first) }(:,
$first:)
},
element {"target"}{
attribute {"type"}{ "close" },
attribute {"gid"}{ generate-id($last) }(:,
$last:)
},
element {"marker"}{
attribute {"type"}{ "open" },
markerset:construct-marker-from-markerset("rdgMarker", "open", $marker-set)
},
element {"marker"}{
attribute {"type"}{ "close" },
reverse( markerset:construct-marker-from-markerset("rdgMarker", "close", $marker-set) )
}
}
};
(:~
: ident:walk()
: This recursive function represents the main conversion which adds the reading markers
: for tei:lem and tei:rdg nodes
:
: @param $nodes nodes to be converted
: @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context
: @return converted node
:
: @version 2.0 (2018-02-01)
: @status working
: @author Uwe Sikora
:)
( $nodes as node()*, $reading-sequence as item()* ) as item()* {
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
for $node in $nodes
return
typeswitch($node)
case processing-instruction() return ()
case text() return (
if (normalize-space($node) eq "") then () else (
ident:mark-node($node, $reading-sequence)
)
)
case element(rdg) return (
if ( not($node/parent::app[ @type eq "structural-variance" ]) ) then (
let $identified-targets := ident:identify-targets($node)
return ident:mark-node( $node, ($reading-sequence, ident:identify-targets($node)) )
) else (
ident:mark-node($node, $reading-sequence)
)
)
case element(lem) return (
if ( not($node/parent::app[ @type eq "structural-variance" ]) ) then (
let $identified-targets := ident:identify-targets($node)
return ident:mark-node( $node, ($reading-sequence, ident:identify-targets($node)) )
) else (
ident:mark-node($node, $reading-sequence)
)
)
default return (
ident:mark-node($node, $reading-sequence)
)
};
(:~
: ident:mark-node()
: This function checks if a given node is a identified first or last save node
: and sets in case of positive identification sets opening and closing markers before and after the node
:
: @param $nodes nodes to be checked and in case of positive identification decorated with markers
: @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context
: @return converted node()
:
: @version 2.0 (2018-02-01)
: @status working
: @author Uwe Sikora
:)
declare function ident:mark-node
( $node as node(), $reading-sequence as item()* ) as node()* {
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
let $node-id := generate-id( $node )
let $in-reading-sequence := $reading-sequence//target[@gid eq $node-id]
return
if ($in-reading-sequence) then (
let $marker := ident:fetch-marker-from-sequence($node-id, $reading-sequence)
let $open := $marker[@type = "open"]/node()
let $close := (for $item in reverse($marker[@type = "close"]) return $item/node())
return(
$open,
if ( $node[ not(self::text()) ] ) then (
element{$node/name()}{
$node/@*,
ident:walk($node/node(), $reading-sequence)
}
) else (
$node
),
$close
)
) else (
if ( $node[ not(self::text()) ] ) then (
element{$node/name()}{
$node/@*,
ident:walk($node/node(), $reading-sequence)
}
) else (
$node
)
)
};
(:~
: ident:mark-text()
: This function checks if a given text() is a identified first or last save node
: and sets in case of positive identification sets opening and closing markers before and after the node
:
: @param $nodes nodes to be checked and in case of positive identification decorated with markers
: @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context
: @return converted node()
:
: @version 2.0 (2018-02-01)
: @status deprecated. integrated in ident:mark-node()
: @author Uwe Sikora
:)
(:declare function ident:mark-text
($node as node(), $reading-sequence as item()* ) as node()* {
let $node-id := generate-id( $node )
let $in-reading-sequence := $reading-sequence//target[@gid eq $node-id]
return
if ($in-reading-sequence) then (
let $marker := ident:fetch-marker-from-sequence($node-id, $reading-sequence)
let $open := $marker[@type = "open"]/node()
let $close := (for $item in reverse($marker[@type = "close"]) return $item/node())
return(
$open, $node, $close
)
) else ( $node )
};:)
(:~
: ident:fetch-marker-from-sequence()
: Helperfunction to collect the reading markers from a given reading sequence
:
: @param $node-id id to be checked against the reading-sequences target-ids
: @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context
: @return reading markers as node()* for the node associated with node-id
:
: @version 2.0 (2018-02-01)
: @status working
: @author Uwe Sikora
:)
declare function ident:fetch-marker-from-sequence
($node-id as xs:string, $reading-sequence as item()* ) as node()* {
for $seq-item in $reading-sequence
let $found := $seq-item/target[@gid = $node-id]
let $found-type := $found/string(@type)
let $markers := $seq-item/marker[@type = $found-type]
where $found
return
$markers
};
(:~
: ident:identify-unit-test()
: Some kind of test-unit-function to eval the main identification functionality of this module on all tei:lem and tei:readings of a given xml-tree
:
: @param $nodes xml-tree to be tested
: @return test report for each tei:lem and tei:reading as node()*
:
: @version 2.0 (2018-02-01)
: @status working
: @note meant to test the identification algorithm
: @author Uwe Sikora
:)
declare function ident:identify-unit-test
for $node at $nr in $nodes//node()[self::lem or self::rdg]
let $identified-targets := ident:identify-targets($node)
return
element{"UTEST"}{
attribute {"n"}{$nr},
element {"SELF"} {$node},
$identified-targets
}
};