diff --git a/modules/intermediate_format/markerset.xqm b/modules/intermediate_format/markerset.xqm index a25714693780e436ea5098b295df9adb082001d3..bec33898ef8891337c33f4ffca29571b3087499e 100644 --- a/modules/intermediate_format/markerset.xqm +++ b/modules/intermediate_format/markerset.xqm @@ -1,5 +1,5 @@ xquery version "3.0"; -(:~ +(:~ : MARKERSET Module ("markerset", "http://bdn.edition.de/intermediate_format/markerset") : ******************************************************************************************* : This module is a helper module and defines functions to collect and construct reading markers @@ -14,32 +14,32 @@ declare default element namespace "http://www.tei-c.org/ns/1.0"; (:############################# Modules Functions #############################:) -(:~ +(:~ : markerset:collect-markers() : This function collect markers for a given reading. : It destinguishes tei:lem and tei:rdg. In case of tei:lem it collects all sibling tei:rdgs. In case of tei:rdg it collect itself. : : @param $reading the reading node to collect readings for : @return node() representing a markerset of readings for the given node - : + : : @version 2.0 (2018-01-29) : @status working : @author Uwe Sikora :) declare function markerset:collect-markers ( $reading as node()* ) as item() { - + let $markers := ( if ($reading[self::lem]) then ( attribute {"count"}{count($reading/following-sibling::rdg)}, for $sibling in $reading/following-sibling::rdg return( element {name($sibling)} { - $sibling/@*, + $sibling/@*, attribute {"context"}{"lem"} } ) - ) + ) else if ($reading[self::rdg]) then ( element {name($reading)} { $reading/@*, @@ -56,24 +56,24 @@ declare function markerset:collect-markers }; -(:~ +(:~ : markerset:merge-markers() : This function merges markers in a given set by the same type. It orders the merged markers according to an explicit ordering. : : @param $markerset node() including the markers that should be merged : @return node()* representing the merged markerset - : + : : @version 2.0 (2018-01-29) : @status working : @author Uwe Sikora :) declare function markerset:merge-markers ( $markerset as node()* ) as item()* { - + let $order := ("om","ppl", "ptl", "pp", "pt" , "v") let $reading-types := distinct-values( $markerset[self::rdg or self::lem]/string(@type) ) - - return ( + + return ( attribute {"order"}{distinct-values( ($order, $reading-types) ) }, for $type in distinct-values( ($order, $reading-types) ) let $rdgs := $markerset[@type = $type] @@ -86,14 +86,14 @@ declare function markerset:merge-markers attribute type {$type} } ) else () - + ) }; -(:~ +(:~ : markerset:marker() - : Constructor function which creates the marker element with name, mark-type and references + : Constructor function which creates the marker element with name, mark-type and references : : @param $name The name of the marker element : @param $mark The mark type e.g. open or close @@ -105,19 +105,20 @@ declare function markerset:merge-markers :) declare function markerset:marker ($name as xs:string, $type as xs:string, $reading as node()) as element(){ - - element {$name} { + if($type = 'open' and data($reading/@type) = 'v' and $reading/@context = 'rdg') + then () + else (element {$name} { (:attribute bdnp_parent {$node/parent::node()/name()}, :) attribute wit { replace(data($reading/@wit), '#', '') }, attribute type { data($reading/@type) }, attribute ref { data($reading/@id) }, attribute mark { $type }, attribute context { $reading/@context } - } + }) }; -(:~ +(:~ : markerset:construct-marker-from-markerset : Helping function to construct markers for a sequence of markersets : @@ -131,9 +132,9 @@ declare function markerset:marker :) declare function markerset:construct-marker-from-markerset ( $name as xs:string, $marker-type as xs:string, $marker-set as node()* ) as item()* { - + for $marker in $marker-set/node() return ( markerset:marker($name, $marker-type, $marker) ) -}; \ No newline at end of file +}; diff --git a/modules/intermediate_format/preprocessing.xqm b/modules/intermediate_format/preprocessing.xqm index f9ddaf00b9e41e2f2f9df47bd9eb737caf49fd5a..061e16c723bf0af48b95276104252204e8d54a87 100644 --- a/modules/intermediate_format/preprocessing.xqm +++ b/modules/intermediate_format/preprocessing.xqm @@ -1,11 +1,11 @@ xquery version "3.0"; -(:~ +(:~ : PREPROCESSING Module ("pre", "http://bdn.edition.de/intermediate_format/preprocessing") : ******************************************************************************************* : This module contains the preprocessing routines for the intermediate format : : It imports the whitespace handling helper module to make some whitespace handling duricng the preprocessing - + : @version 2.0 (2018-01-29) : @status working : @author Uwe Sikora @@ -18,20 +18,20 @@ declare default element namespace "http://www.tei-c.org/ns/1.0"; (:############################# Modules Functions #############################:) -(:~ +(:~ : pre:preprocessing-textNode : preprocessing function which converts each text() into a xml-node "textNode". This function is a experimental fall back solution and not the main preprocessing routine! : : @param $nodes the nodes to be converted : @return item()* representing the converted node - : + : : @version 1.2 (2017-10-15) : @status working : @author Uwe Sikora :) declare function pre:preprocessing-textNode ($nodes as node()*) as item()* { - + for $node in $nodes return typeswitch($node) @@ -44,7 +44,7 @@ declare function pre:preprocessing-textNode } ) ) - + case element(TEI) return ( element{$node/name()}{ $node/@*, @@ -54,7 +54,7 @@ declare function pre:preprocessing-textNode } } ) - + case element(lem) return ( element{$node/name()}{ $node/@*, @@ -62,7 +62,7 @@ declare function pre:preprocessing-textNode pre:preprocessing-textNode($node/node()) } ) - + case element(rdg) return ( element{$node/name()}{ $node/@*, @@ -70,7 +70,7 @@ declare function pre:preprocessing-textNode pre:preprocessing-textNode($node/node()) } ) - + case element(note) return ( if ($node[@type eq "editorial"]) then ( ) else ( @@ -80,8 +80,8 @@ declare function pre:preprocessing-textNode } ) ) - - default return ( + + default return ( element{$node/name()}{ $node/@*, pre:preprocessing-textNode($node/node()) @@ -90,7 +90,7 @@ declare function pre:preprocessing-textNode }; -(:~ +(:~ : pre:pre:default-element : function that suites as default element constructor for the preproseccing conversion. : It is more or less a copy function, copying the elements name and its node and recurively leeds the conversion to its child-nodes @@ -98,7 +98,7 @@ declare function pre:preprocessing-textNode : @param $node the node to be copied : @param $recursive-function the recursive function as some kind of call back to the main conversion : @return item()* representing the converted node - : + : : @version 1.0 (2018-01-31) : @note Would be great if $recursive-function would be a real function and not a node-sequence (TO-DO) : @status working @@ -106,39 +106,49 @@ declare function pre:preprocessing-textNode :) declare function pre:default-element ( $node as node(), $recursive-function as node()* ) as item()* { - - element{$node/name()}{ - $node/@*, - $recursive-function - } + let $following-node := $node/following-sibling::node()[1] + let $following-sibling := $node/following-sibling::*[1] + return + element{$node/name()}{ + $node/@*, + (if($following-node[matches(., "[\s\n\r\t]") and normalize-space(.) = ""] + and $following-sibling[self::ref or self::app or self::hi or self::bibl + or self::foreign or self::choice or self::milestone or self::persName + or self::choice or self::index or self::seg] + and not($node[self::index])) + then + attribute {"break-after"}{"yes"} + else ()), + $recursive-function + } }; -(:~ +(:~ : pre:preprocessing : main preprocessing function. : : @param $nodes the nodes to be converted : @return item()* representing the converted node - : + : : @version 2.0 (2018-02-01) : @status working : @author Uwe Sikora :) declare function pre:preprocessing ($nodes as node()*) as item()* { - + for $node in $nodes return typeswitch($node) case processing-instruction() return () - + case text() return ( whitespace:text($node, " ") ) - + case comment() return () - + case element(TEI) return ( element{$node/name()}{ $node/@*, @@ -150,19 +160,19 @@ declare function pre:preprocessing } } ) - + case element(teiHeader) return ( $node ) - + case element(div) return ( if ($node[@type = 'section-group']) then ( pre:preprocessing($node/node()) - ) + ) else ( pre:default-element( $node, pre:preprocessing($node/node()) ) ) - + ) - + case element(lem) return ( element{$node/name()}{ $node/@*, @@ -170,7 +180,7 @@ declare function pre:preprocessing pre:preprocessing($node/node()) } ) - + case element(rdg) return ( element{$node/name()}{ $node/@*, @@ -178,87 +188,84 @@ declare function pre:preprocessing pre:preprocessing($node/node()) } ) - + case element(note) return ( if ( $node[@type != "editorial"] or $node[ not(@type) ] ) then ( pre:default-element( $node, pre:preprocessing($node/node()) ) ) else ( ) ) - + case element(pb) return ( let $preceding-sibling := $node/preceding-sibling::node()[1] let $following-sibling := $node/following-sibling::node()[1] - let $first := $node = $node/parent::node()/node()[not(self::text() and normalize-space(self::node()) = '')][1] - let $ignore := ("docAuthor", "app", "index", "seg", "bibl") return element {$node/name()}{ $node/@*, - - if ( + + if ( ( $preceding-sibling[self::text() and not(normalize-space(.) = '')] and ends-with($preceding-sibling, " ") = false() ) and ( $following-sibling[self::text() and not(normalize-space(.) = '')] and starts-with($following-sibling, " ") = false() ) - ) then ( attribute {"break"}{"no"} ) - - (:else if ( - ( $preceeding-sibling[self::text() and not(normalize-space(.) = '')] and ends-with($preceeding-sibling, " ") = true() ) - and - ( $following-sibling[self::text() and not(normalize-space(.) = '')] and starts-with($following-sibling, " ") = true() ) - ) then ( attribute {"clear"}{"left"} ) :) - - else if ( $following-sibling[self::docAuthor or self::app or self::index or self::seg or self::bibl] ) then ( - attribute {"break"}{"yes"} - ) - - else if ( - ( $preceding-sibling[self::text()][matches(., "[\s\n\r\t]") and normalize-space(.) = ""] ) + ) then ( attribute {"break"}{"no"} ) + else if ( + ( $preceding-sibling[matches(., "[\s\n\r\t]") and normalize-space(.) = ""] ) and - ( $following-sibling[self::text()][matches(., "[\s\n\r\t]") and normalize-space(.) = ""] ) + ( $following-sibling[matches(., "[\s\n\r\t]") and normalize-space(.) = ""] ) ) then ( - attribute {"break"}{"yes"} + attribute {"break-before"}{"yes"}, + attribute {"break-after"}{"yes"} + ) + else if ( + $preceding-sibling[matches(., "[\s\n\r\t]") and normalize-space(.) = ""] + ) then ( + attribute {"break-before"}{"yes"} + ) + else if ( + $following-sibling[matches(., "[\s\n\r\t]") and normalize-space(.) = ""] + ) then ( + attribute {"break-after"}{"yes"} ) - else ( ) } ) - + case element(hi) return ( - if($node[@rend = 'right-aligned' or @rend = 'center-aligned']) then( + if($node[@rend = 'right-aligned' or @rend = 'center-aligned']) then( element {'aligned'} { $node/@*, pre:preprocessing($node/node()) - } + } ) else ( pre:default-element( $node, pre:preprocessing($node/node()) ) ) ) - + case element(seg) return ( if($node[@type = 'item']) then( element {'item'} { $node/@*[name() != 'type'], pre:preprocessing($node/node()) - } + } ) else if($node[@type = 'head']) then( element {'head'} { $node/@*[name() != 'type'], pre:preprocessing($node/node()) - } + } ) else if($node[@type = 'row']) then( element {'row'} { $node/@*[name() != 'type'], pre:preprocessing($node/node()) - } + } ) else ( pre:default-element( $node, pre:preprocessing($node/node()) ) ) ) - - default return ( + + default return ( pre:default-element( $node, pre:preprocessing($node/node()) ) ) -}; \ No newline at end of file +}; diff --git a/modules/intermediate_format/whitespace-handling.xqm b/modules/intermediate_format/whitespace-handling.xqm index dd7bee3d553dcdad37e57aada1cfd5ab5eea715c..4e797d5772be53852413093f2c18d048e88d1b1e 100644 --- a/modules/intermediate_format/whitespace-handling.xqm +++ b/modules/intermediate_format/whitespace-handling.xqm @@ -1,5 +1,5 @@ xquery version "3.0"; -(:~ +(:~ : WHITESPACE Module ("whitespace", "http://bdn.edition.de/intermediate_format/whitespace_handling") : ******************************************************************************************* : This module contains the functions to handle different whitespace operations on text @@ -9,12 +9,14 @@ xquery version "3.0"; : @author Uwe Sikora :) module namespace whitespace="http://bdn.edition.de/intermediate_format/whitespace_handling"; +import module namespace pre="http://bdn.edition.de/intermediate_format/preprocessing" at "preprocessing.xqm"; + declare default element namespace "http://www.tei-c.org/ns/1.0"; (:############################# Modules Functions #############################:) -(:~ +(:~ : whitespace:text() : This function handles whitespace in defined text() nodes : @@ -28,26 +30,26 @@ declare default element namespace "http://www.tei-c.org/ns/1.0"; :) declare function whitespace:text ( $text as text()*, $escape-char as xs:string? ) as text()* { - + let $normalized := normalize-space($text) let $whitespace-node := $text[matches(., "[\s\n\r\t]") and normalize-space(.) = ""] let $single-whitespace-between-nodes := $text = ' ' - return + return if ( not($whitespace-node) or $single-whitespace-between-nodes) then ( - + if ($escape-char) then ( - whitespace:escape-text($text, "#") + whitespace:escape-text($text, "@") ) else ( whitespace:escape-text($text, " ") ) - - ) + + ) else () - + }; -(:~ +(:~ : whitespace:escape-text() - : This function replaces whitespaces in a text() + : This function replaces whitespaces in a text() : with a defined preservation character : : @param $text the text-node to be converted