Newer
Older
(:~
: IDENTIFICATION Module ("ident", "http://bdn.edition.de/intermediate_format/identification")
: *******************************************************************************************
: This module defines functions and variables to set reading markers in tei:lem or tei:rdg elements.
: The problem it solves is to identify non-Blocklevel-elements self not containing Blocklevel-elements
: on their first or last decendants path to set textcritical markers required in the printed version of a
: BdN digital edition.
:
: The basic idea is constructing a some kind of first- and last-descendants PATH for reading nodes (tei:lem and tei:rdg) describing
: a save PATH of non-BLE self not including Blocklevel-elements on their own first- or last-decendants paths down the tree.
:
: It includes the helping module "markerset" holding helper functions to collect and construct reading markers
: @version 2.0 (2018-01-29)
: @note This new versions identification algorithm is more flexible and much more configurable as in the old version 1
: @status working
: @author Uwe Sikora
:)
module namespace ident="http://bdn.edition.de/intermediate_format/identification";
import module namespace markerset = "http://bdn.edition.de/intermediate_format/markerset" at "markerset.xqm";
declare default element namespace "http://www.tei-c.org/ns/1.0";
(:############################# Modules Variables #############################:)
(:~
: ident:blocklevel-elements
: Variable defining Blocklevelelements by name
:
: @version 2.0 (2018-01-29)
: @author Uwe Sikora
:)
declare variable $ident:blocklevel-elements := ('titlePage', 'titlePart', 'aligned', 'div', 'list', 'item', 'table', 'row', 'cell', 'head', 'p', 'note');
(:############################# Modules Functions #############################:)
(:~
: ident:in-sequence()
: This function checks if nodes are includes in a sequence of nodes
:
: @param $values the nodes to check against the sequence
: @param $sequence a sequence of AtomicTypes
: @return xs:boolean ('true' else 'false')
:
: @version 2.0 (2018-01-29)
: @status working
: @author Uwe Sikora
:)
declare function ident:in-sequence
( $values as xs:anyAtomicType* , $sequence as xs:anyAtomicType*) as xs:boolean {
$values = $sequence
};
(:~
: ident:is-or-are-ble()
: This function checks if nodes are Blocklevelelements
:
: @param $values the nodes to check against the sequence
: @return xs:boolean ('true' else 'false')
:
: @version 2.0 (2018-01-29)
: @note derived function from ident:in-sequence
: @status working
: @author Uwe Sikora
:)
declare function ident:is-or-are-ble
( $values as xs:anyAtomicType* ) as xs:boolean {
$values = $ident:blocklevel-elements
};
: ident:left-nodes-path()
: This recursive function describes the so called left nodes PATH,
: that is the path of all save left nodes in a given tree (1) not self a BLE and not including
: BLEs in their own left nodes PATH and (2) listen to defined parameters
:
: @param $node the nodes in the PATH from where all following nodes and thus the path itsel is defined
: @return set of node() representing the PATH
:
: @version 2.0 (2018-01-30)
: @status working
: @author Uwe Sikora
:)
declare function ident:left-nodes-path
let $first-child := (
let $target := $node/child::node()[1]
return
(: PATH CONTROLL for the left-nodes-path :)
(: IN CASE there is an tei:app, be ready to change the path to
the first tei:rdg[ppl, ptl] and its first child::node()! :)
if ( $target[self::app] ) then (
(: If tei:app has an empty tei:lem change the path to tei:lems last child() :)
if ( empty($target/child::lem/node()) ) then (
$target/child::rdg[@type eq "ppl" or @type eq "ptl"][1]/node()[1]
)
(: If tei:app has no empty tei:lem
follow the normal path from tei:app :)
else (
$target
)
)
(: If there is no tei:app proceed on normal path by default :)
else (
$target
)
)
return
if($first-child) then ($first-child, ident:left-nodes-path($first-child)) else ()
: ident:right-nodes-path()
: This recursive function describes the so called left nodes PATH,
: that is the path of all save right nodes in a given tree (1) not self a BLE and not including
: BLEs in their own right nodes PATH and (2) listen to defined parameters
:
: @param $node the nodes in the PATH from where all following nodes and thus the path itsel is defined
: @return set of node() representing the PATH
:
: @version 2.0 (2018-01-30)
: @status working
: @author Uwe Sikora
:)
declare function ident:right-nodes-path
let $last-child := (
let $target := $node/child::node()[last()]
return
(: PATH CONTROLL for the right-nodes-path :)
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
(: IN CASE there is an tei:app, be ready to change the path ! :)
if ( $target[self::app] ) then (
(: If tei:apps last child is a tei:rdg[ppl, ptl] change the path to this rdg and
its last child() :)
if ( $target/child::node()[last()][ self::rdg[@type eq "ppl" or @type eq "ptl"] ] ) then (
$target/child::node()[last()]/child::node()[last()]
)
(: If tei:app has no last child tei:rdg[ppl, ptl] and its tei:lem is not empty
change the path to tei:lems last child() :)
else if ( not(empty($target/child::lem/node())) ) then (
$target/lem/child::node()[last()]
)
(: If tei:app has no last child tei:rdg[ppl, ptl] and its tei:lem is empty
follow the normal path from tei:app :)
else (
$target
)
)
(: If there is no tei:app proceed on normal path by default :)
else (
$target
)
)
return
if($last-child) then ($last-child, ident:right-nodes-path($last-child)) else ()
(:~
: ident:first-save-node()
: This function identifies the first-save node for a given node()
:
: @param $node the node of which the first save node should be identified
: @return the first save node of a defined set of save nodes
:
: @version 2.0 (2018-01-30)
: @status working
: @author Uwe Sikora
:)
declare function ident:first-save-node
let $first := ident:left-nodes-path($node)
[not ( ident:is-or-are-ble(self::node()/name()) )]
[not ( ident:is-or-are-ble( ident:left-nodes-path(self::node())/name() ) )]
return $first[1]
};
(:~
: ident:last-save-node()
: This function identifies the last-save node for a given node()
:
: @param $node the node of which the last save node should be identified
: @return the last save node of a defined set of save nodes
:
: @version 2.0 (2018-01-30)
: @status working
: @author Uwe Sikora
:)
declare function ident:last-save-node
let $last := ident:right-nodes-path($node)
[not ( ident:is-or-are-ble(self::node()/name()) )]
[not ( ident:is-or-are-ble( ident:right-nodes-path(self::node())/name() ) )]
return $last[1]
};
(:~
: ident:identify-targets()
: This function identifies the first and last save node for a given reading (tei:lem and tei:rdg)
: It also collect the sibling readings as shortcuts (name and attributes) to build a set
: of reading markers for opening and closing Markers
:
: @param $node the reading nodegoing to be evaluated
: @return evaluation report for the node acording to the following form
: - element "rdg" or "lem" incl. copied attributes
: - element "target"[@type = "open"] incl. @id (generated)
: - element "target"[@type = "close"] incl. @id (generated)
: - element "marker"[@type = "open"] incl. @id (generated)
: - element "marker"[@type = "close"] incl. @id (generated)
:
: @version 2.0 (2018-01-31)
: @status working
: @author Uwe Sikora
:)
declare function ident:identify-targets
let $first := ident:first-save-node($node)
let $last := ident:last-save-node($node)
let $marker-set := markerset:collect-markers($node)
let $markers := markerset:construct-marker-from-markerset("rdgMarker", "open", $marker-set)
return
element {$node/name()}{
$node/@*,
element {"target"}{
attribute {"type"}{ "open" },
attribute {"gid"}{ generate-id($first) }(:,
$first:)
},
element {"target"}{
attribute {"type"}{ "close" },
attribute {"gid"}{ generate-id($last) }(:,
$last:)
},
element {"marker"}{
attribute {"type"}{ "open" },
markerset:construct-marker-from-markerset("rdgMarker", "open", $marker-set)
},
element {"marker"}{
attribute {"type"}{ "close" },
reverse( markerset:construct-marker-from-markerset("rdgMarker", "close", $marker-set) )
}
}
};
(:~
: ident:walk()
: This recursive function represents the main conversion which adds the reading markers
: for tei:lem and tei:rdg nodes
:
: @param $nodes nodes to be converted
: @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context
: @return converted node
:
: @version 2.0 (2018-02-01)
: @status working
: @author Uwe Sikora
:)
( $nodes as node()*, $reading-sequence as item()* ) as item()* {
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
for $node in $nodes
return
typeswitch($node)
case processing-instruction() return ()
case text() return (
if (normalize-space($node) eq "") then () else (
ident:mark-node($node, $reading-sequence)
)
)
case element(rdg) return (
if ( not($node/parent::app[ @type eq "structural-variance" ]) ) then (
let $identified-targets := ident:identify-targets($node)
return ident:mark-node( $node, ($reading-sequence, ident:identify-targets($node)) )
) else (
ident:mark-node($node, $reading-sequence)
)
)
case element(lem) return (
if ( not($node/parent::app[ @type eq "structural-variance" ]) ) then (
let $identified-targets := ident:identify-targets($node)
return ident:mark-node( $node, ($reading-sequence, ident:identify-targets($node)) )
) else (
ident:mark-node($node, $reading-sequence)
)
)
default return (
ident:mark-node($node, $reading-sequence)
)
};
(:~
: ident:mark-node()
: This function checks if a given node is a identified first or last save node
: and sets in case of positive identification sets opening and closing markers before and after the node
:
: @param $nodes nodes to be checked and in case of positive identification decorated with markers
: @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context
: @return converted node()
:
: @version 2.0 (2018-02-01)
: @status working
: @author Uwe Sikora
:)
declare function ident:mark-node
( $node as node(), $reading-sequence as item()* ) as node()* {
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
let $node-id := generate-id( $node )
let $in-reading-sequence := $reading-sequence//target[@gid eq $node-id]
return
if ($in-reading-sequence) then (
let $marker := ident:fetch-marker-from-sequence($node-id, $reading-sequence)
let $open := $marker[@type = "open"]/node()
let $close := (for $item in reverse($marker[@type = "close"]) return $item/node())
return(
$open,
if ( $node[ not(self::text()) ] ) then (
element{$node/name()}{
$node/@*,
ident:walk($node/node(), $reading-sequence)
}
) else (
$node
),
$close
)
) else (
if ( $node[ not(self::text()) ] ) then (
element{$node/name()}{
$node/@*,
ident:walk($node/node(), $reading-sequence)
}
) else (
$node
)
)
};
(:~
: ident:mark-text()
: This function checks if a given text() is a identified first or last save node
: and sets in case of positive identification sets opening and closing markers before and after the node
:
: @param $nodes nodes to be checked and in case of positive identification decorated with markers
: @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context
: @return converted node()
:
: @version 2.0 (2018-02-01)
: @status deprecated. integrated in ident:mark-node()
: @author Uwe Sikora
:)
(:declare function ident:mark-text
($node as node(), $reading-sequence as item()* ) as node()* {
let $node-id := generate-id( $node )
let $in-reading-sequence := $reading-sequence//target[@gid eq $node-id]
return
if ($in-reading-sequence) then (
let $marker := ident:fetch-marker-from-sequence($node-id, $reading-sequence)
let $open := $marker[@type = "open"]/node()
let $close := (for $item in reverse($marker[@type = "close"]) return $item/node())
return(
$open, $node, $close
)
) else ( $node )
};:)
(:~
: ident:fetch-marker-from-sequence()
: Helperfunction to collect the reading markers from a given reading sequence
:
: @param $node-id id to be checked against the reading-sequences target-ids
: @param $reading-sequence sequence holding the evaluation reports of the relevant readings in the nodes' context
: @return reading markers as node()* for the node associated with node-id
:
: @version 2.0 (2018-02-01)
: @status working
: @author Uwe Sikora
:)
declare function ident:fetch-marker-from-sequence
($node-id as xs:string, $reading-sequence as item()* ) as node()* {
for $seq-item in $reading-sequence
let $found := $seq-item/target[@gid = $node-id]
let $found-type := $found/string(@type)
let $markers := $seq-item/marker[@type = $found-type]
where $found
return
$markers
};
(:~
: ident:identify-unit-test()
: Some kind of test-unit-function to eval the main identification functionality of this module on all tei:lem and tei:readings of a given xml-tree
:
: @param $nodes xml-tree to be tested
: @return test report for each tei:lem and tei:reading as node()*
:
: @version 2.0 (2018-02-01)
: @status working
: @note meant to test the identification algorithm
: @author Uwe Sikora
:)
declare function ident:identify-unit-test
for $node at $nr in $nodes//node()[self::lem or self::rdg]
let $identified-targets := ident:identify-targets($node)
return
element{"UTEST"}{
attribute {"n"}{$nr},
element {"SELF"} {$node},
$identified-targets
}
};