Newer
Older
: PREPROCESSING Module ("pre", "http://bdn.edition.de/intermediate_format/preprocessing")
: *******************************************************************************************
: This module contains the preprocessing routines for the intermediate format
:
: It imports the whitespace handling helper module to make some whitespace handling duricng the preprocessing
: @version 2.0 (2018-01-29)
: @status working
: @author Uwe Sikora
: @author Michelle Weidling
module namespace pre="http://bdn-edition.de/intermediate_format/preprocessing";
import module namespace whitespace = "http://bdn-edition.de/intermediate_format/whitespace_handling" at "whitespace-handling.xqm";
import module namespace console="http://exist-db.org/xquery/console";
declare default element namespace "http://www.tei-c.org/ns/1.0";
(:############################# Modules Functions #############################:)
: pre:preprocessing-textNode
: preprocessing function which converts each text() into a xml-node "textNode". This function is a experimental fall back solution and not the main preprocessing routine!
:
: @param $nodes the nodes to be converted
: @return item()* representing the converted node
: @version 1.2 (2017-10-15)
: @status working
: @author Uwe Sikora
:)
declare function pre:preprocessing-textNode
($nodes as node()*) as item()* {
for $node in $nodes
return
typeswitch($node)
case processing-instruction() return ()
case text() return (
if (normalize-space($node) eq "") then () else (
element {"textNode"} {
(:attribute {"interformId"}{ generate-id($node) },:)
$node
}
)
)
case element(TEI) return (
element{$node/name()}{
$node/@*,
pre:preprocessing-textNode($node/node()),
pre:get-editorial-notes($node, $replace-whitespace)
case element(lem) return (
element{$node/name()}{
$node/@*,
attribute {"id"}{ generate-id($node)},
pre:preprocessing-textNode($node/node())
}
)
case element(rdg) return (
element{$node/name()}{
$node/@*,
attribute {"id"}{ generate-id($node)},
pre:preprocessing-textNode($node/node())
}
)
if ($node[@type eq "editorial-commentary"]) then (
) else (
element{$node/name()}{
$node/@*,
pre:preprocessing-textNode($node/node())
}
)
)
element{$node/name()}{
$node/@*,
pre:preprocessing-textNode($node/node())
}
)
};
: pre:pre:default-element
: function that suites as default element constructor for the preproseccing conversion.
: It is more or less a copy function, copying the elements name and its node and recurively leeds the conversion to its child-nodes
:
: @param $node the node to be copied
: @param $recursive-function the recursive function as some kind of call back to the main conversion
: @return item()* representing the converted node
: @version 1.0 (2018-01-31)
: @note Would be great if $recursive-function would be a real function and not a node-sequence (TO-DO)
: @status working
: @author Uwe Sikora
:)
declare function pre:default-element
( $node as node(), $recursive-function as node()* ) as element()* {
let $following-node := $node/following-sibling::node()[1]
let $following-sibling := $node/following-sibling::*[1]
return
element{$node/name()}{
(if($following-node[matches(., "[\s\n\r\t]") and normalize-space(.) = ""]
and $following-sibling[self::ref or self::app or self::hi or self::bibl
or self::foreign or self::choice or self::milestone or self::persName
or self::choice or self::index or self::seg or self::ptr]
or ($node[self::milestone]) and $following-node[self::text()]
(: ptr in the critical apparatus produce the siglum '[E]' which should
be followed by a whitespace :)
or ($node[self::ptr] and $node/ancestor::rdg[@type = ("v", "pp", "pt")]))
then
attribute {"break-after"}{"yes"}
else ()),
$recursive-function
: pre:preprocessing
: main preprocessing function.
:
: @param $nodes the nodes to be converted
: @return item()* representing the converted node
: @version 2.0 (2018-02-01)
: @status working
: @author Uwe Sikora
:)
declare function pre:preprocessing
($nodes as node()*, $replace-whitespace as xs:boolean)
for $node in $nodes
return
typeswitch($node)
case processing-instruction() return ()
if($replace-whitespace) then (
whitespace:text($node, " ")
)
else (
$node
)
case element(TEI) return (
element{$node/name()}{
$node/@*,
pre:preprocessing($node/node(), $replace-whitespace),
pre:get-editorial-notes($node, $replace-whitespace)
if ($node[@type = "section-group"]) then (
pre:preprocessing($node/node(), $replace-whitespace)
else if($node[@type]) then
element{$node/name()}{
$node/@*,
attribute id {generate-id($node)},
pre:preprocessing($node/node(), $replace-whitespace)
else if ($node[not(@type)][not(descendant::div)]) then
element{$node/name()}{
attribute type {"single-div"},
attribute id {generate-id($node)},
pre:preprocessing($node/node(), $replace-whitespace)
pre:default-element( $node, pre:preprocessing($node/node(), $replace-whitespace) )
case element(app) return
if($node/parent::div[@type = "section-group"]) then
element{$node/name()}{
attribute type {"standalone"},
attribute id {generate-id($node)},
pre:preprocessing($node/node(), $replace-whitespace)
else
pre:default-element( $node, pre:preprocessing($node/node(), $replace-whitespace) )
case element(lem) return (
element{$node/name()}{
$node/@*,
attribute {"id"}{ generate-id($node)},
pre:preprocessing($node/node(), $replace-whitespace)
element{$node/name()}{
$node/@*,
attribute {"id"}{ generate-id($node)},
pre:preprocessing($node/node(), $replace-whitespace)
}
if ( $node[@type != "editorial-commentary"] or $node[ not(@type) ] ) then (
pre:default-element( $node, pre:preprocessing($node/node(), $replace-whitespace) )
) else ( )
(: pre:default-element( $node, pre:preprocessing($node/node(), $replace-whitespace) ):)

usikora
committed
let $preceding-sibling := $node/preceding-sibling::node()[1]
let $following-sibling := $node/following-sibling::node()[1]
return
element {$node/name()}{
$node/@*,

usikora
committed
( $preceding-sibling[self::text() and not(normalize-space(.) = '')] and ends-with($preceding-sibling, " ") = false() )
and
( $following-sibling[self::text() and not(normalize-space(.) = '')] and starts-with($following-sibling, " ") = false() )
) then ( attribute {"break"}{"no"} )
else if (
( $preceding-sibling[matches(., "[\s\n\r\t]") and normalize-space(.) = ""] )

usikora
committed
and
( $following-sibling[matches(., "[\s\n\r\t]") and normalize-space(.) = ""] )

usikora
committed
) then (
attribute {"break-before"}{"yes"},
attribute {"break-after"}{"yes"}
)
else if (
$preceding-sibling[matches(., "[\s\n\r\t]") and normalize-space(.) = ""]
) then (
attribute {"break-before"}{"yes"}
)
else if (
$following-sibling[matches(., "[\s\n\r\t]") and normalize-space(.) = ""]
) then (
attribute {"break-after"}{"yes"}

usikora
committed
)
else ( )
if($node[@rend = ('right-aligned', 'center-aligned')]) then(
element {'aligned'} {
$node/@*,
pre:preprocessing($node/node(), $replace-whitespace)
else if($node[@rend = 'spaced-out']) then
element {'hi'} {
$node/@*,
let $text := $node/text()
let $str-length := string-length($text)
let $spaced-out :=
for $iii in 1 to $str-length return
if(not($iii = $str-length)) then
(substring($text, $iii, 1), "@")
else
substring($text, $iii, 1)
return string-join($spaced-out, '')
pre:default-element( $node, pre:preprocessing($node/node(), $replace-whitespace) )
case element(seg) return (
if($node[@type = 'item']) then(
element {'item'} {
$node/@*[name() != 'type'],
pre:preprocessing($node/node(), $replace-whitespace)
else if($node[@type = 'head']) then(
element {'head'} {
$node/@*[name() != 'type'],
pre:preprocessing($node/node(), $replace-whitespace)
else if($node[@type = 'row']) then(
element {'row'} {
$node/@*[name() != 'type'],
pre:preprocessing($node/node(), $replace-whitespace)
pre:preprocessing($node/node(), $replace-whitespace)
pre:default-element( $node, pre:preprocessing($node/node(), $replace-whitespace) )
pre:default-element( $node, pre:preprocessing($node/node(), $replace-whitespace) )
(:~
: Returns all editorial notes of a document in a new element.
:
: @author Michelle Weidling :)
declare function pre:get-editorial-notes($node as node()*, $replace-whitespace)
as element(editorial-notes) {
element{"editorial-notes"}{
for $editorial-note in $node//note[@type eq "editorial-commentary"]
return
pre:default-element($editorial-note, pre:preprocessing($editorial-note/node(), $replace-whitespace))
}
};
declare function pre:tokenize-by-character($text as text()) as xs:string* {
for $iii in 1 to string-length($text) return
substring($text, $iii, 1)
};
declare function pre:add-spaces-between-chars($characters as xs:string*) as xs:string {
string-join($characters, '@')
};