Skip to content
Snippets Groups Projects
Unverified Commit 0ba4a38d authored by uwe's avatar uwe Committed by GitHub
Browse files

Merge pull request #12 from MRodz/master

added further preprocessing for whitespaces
parents a69dfd44 c1d871df
No related branches found
No related tags found
No related merge requests found
xquery version "3.0"; xquery version "3.0";
(:~ (:~
: MARKERSET Module ("markerset", "http://bdn.edition.de/intermediate_format/markerset") : MARKERSET Module ("markerset", "http://bdn.edition.de/intermediate_format/markerset")
: ******************************************************************************************* : *******************************************************************************************
: This module is a helper module and defines functions to collect and construct reading markers : This module is a helper module and defines functions to collect and construct reading markers
...@@ -14,32 +14,32 @@ declare default element namespace "http://www.tei-c.org/ns/1.0"; ...@@ -14,32 +14,32 @@ declare default element namespace "http://www.tei-c.org/ns/1.0";
(:############################# Modules Functions #############################:) (:############################# Modules Functions #############################:)
(:~ (:~
: markerset:collect-markers() : markerset:collect-markers()
: This function collect markers for a given reading. : This function collect markers for a given reading.
: It destinguishes tei:lem and tei:rdg. In case of tei:lem it collects all sibling tei:rdgs. In case of tei:rdg it collect itself. : It destinguishes tei:lem and tei:rdg. In case of tei:lem it collects all sibling tei:rdgs. In case of tei:rdg it collect itself.
: :
: @param $reading the reading node to collect readings for : @param $reading the reading node to collect readings for
: @return node() representing a markerset of readings for the given node : @return node() representing a markerset of readings for the given node
: :
: @version 2.0 (2018-01-29) : @version 2.0 (2018-01-29)
: @status working : @status working
: @author Uwe Sikora : @author Uwe Sikora
:) :)
declare function markerset:collect-markers declare function markerset:collect-markers
( $reading as node()* ) as item() { ( $reading as node()* ) as item() {
let $markers := ( let $markers := (
if ($reading[self::lem]) then ( if ($reading[self::lem]) then (
attribute {"count"}{count($reading/following-sibling::rdg)}, attribute {"count"}{count($reading/following-sibling::rdg)},
for $sibling in $reading/following-sibling::rdg for $sibling in $reading/following-sibling::rdg
return( return(
element {name($sibling)} { element {name($sibling)} {
$sibling/@*, $sibling/@*,
attribute {"context"}{"lem"} attribute {"context"}{"lem"}
} }
) )
) )
else if ($reading[self::rdg]) then ( else if ($reading[self::rdg]) then (
element {name($reading)} { element {name($reading)} {
$reading/@*, $reading/@*,
...@@ -56,24 +56,24 @@ declare function markerset:collect-markers ...@@ -56,24 +56,24 @@ declare function markerset:collect-markers
}; };
(:~ (:~
: markerset:merge-markers() : markerset:merge-markers()
: This function merges markers in a given set by the same type. It orders the merged markers according to an explicit ordering. : This function merges markers in a given set by the same type. It orders the merged markers according to an explicit ordering.
: :
: @param $markerset node() including the markers that should be merged : @param $markerset node() including the markers that should be merged
: @return node()* representing the merged markerset : @return node()* representing the merged markerset
: :
: @version 2.0 (2018-01-29) : @version 2.0 (2018-01-29)
: @status working : @status working
: @author Uwe Sikora : @author Uwe Sikora
:) :)
declare function markerset:merge-markers declare function markerset:merge-markers
( $markerset as node()* ) as item()* { ( $markerset as node()* ) as item()* {
let $order := ("om","ppl", "ptl", "pp", "pt" , "v") let $order := ("om","ppl", "ptl", "pp", "pt" , "v")
let $reading-types := distinct-values( $markerset[self::rdg or self::lem]/string(@type) ) let $reading-types := distinct-values( $markerset[self::rdg or self::lem]/string(@type) )
return ( return (
attribute {"order"}{distinct-values( ($order, $reading-types) ) }, attribute {"order"}{distinct-values( ($order, $reading-types) ) },
for $type in distinct-values( ($order, $reading-types) ) for $type in distinct-values( ($order, $reading-types) )
let $rdgs := $markerset[@type = $type] let $rdgs := $markerset[@type = $type]
...@@ -86,14 +86,14 @@ declare function markerset:merge-markers ...@@ -86,14 +86,14 @@ declare function markerset:merge-markers
attribute type {$type} attribute type {$type}
} }
) else () ) else ()
) )
}; };
(:~ (:~
: markerset:marker() : markerset:marker()
: Constructor function which creates the marker element with name, mark-type and references : Constructor function which creates the marker element with name, mark-type and references
: :
: @param $name The name of the marker element : @param $name The name of the marker element
: @param $mark The mark type e.g. open or close : @param $mark The mark type e.g. open or close
...@@ -105,19 +105,20 @@ declare function markerset:merge-markers ...@@ -105,19 +105,20 @@ declare function markerset:merge-markers
:) :)
declare function markerset:marker declare function markerset:marker
($name as xs:string, $type as xs:string, $reading as node()) as element(){ ($name as xs:string, $type as xs:string, $reading as node()) as element(){
if($type = 'open' and data($reading/@type) = 'v' and $reading/@context = 'rdg')
element {$name} { then ()
else (element {$name} {
(:attribute bdnp_parent {$node/parent::node()/name()}, :) (:attribute bdnp_parent {$node/parent::node()/name()}, :)
attribute wit { replace(data($reading/@wit), '#', '') }, attribute wit { replace(data($reading/@wit), '#', '') },
attribute type { data($reading/@type) }, attribute type { data($reading/@type) },
attribute ref { data($reading/@id) }, attribute ref { data($reading/@id) },
attribute mark { $type }, attribute mark { $type },
attribute context { $reading/@context } attribute context { $reading/@context }
} })
}; };
(:~ (:~
: markerset:construct-marker-from-markerset : markerset:construct-marker-from-markerset
: Helping function to construct markers for a sequence of markersets : Helping function to construct markers for a sequence of markersets
: :
...@@ -131,9 +132,9 @@ declare function markerset:marker ...@@ -131,9 +132,9 @@ declare function markerset:marker
:) :)
declare function markerset:construct-marker-from-markerset declare function markerset:construct-marker-from-markerset
( $name as xs:string, $marker-type as xs:string, $marker-set as node()* ) as item()* { ( $name as xs:string, $marker-type as xs:string, $marker-set as node()* ) as item()* {
for $marker in $marker-set/node() for $marker in $marker-set/node()
return ( return (
markerset:marker($name, $marker-type, $marker) markerset:marker($name, $marker-type, $marker)
) )
}; };
\ No newline at end of file
xquery version "3.0"; xquery version "3.0";
(:~ (:~
: PREPROCESSING Module ("pre", "http://bdn.edition.de/intermediate_format/preprocessing") : PREPROCESSING Module ("pre", "http://bdn.edition.de/intermediate_format/preprocessing")
: ******************************************************************************************* : *******************************************************************************************
: This module contains the preprocessing routines for the intermediate format : This module contains the preprocessing routines for the intermediate format
: :
: It imports the whitespace handling helper module to make some whitespace handling duricng the preprocessing : It imports the whitespace handling helper module to make some whitespace handling duricng the preprocessing
: @version 2.0 (2018-01-29) : @version 2.0 (2018-01-29)
: @status working : @status working
: @author Uwe Sikora : @author Uwe Sikora
...@@ -18,20 +18,20 @@ declare default element namespace "http://www.tei-c.org/ns/1.0"; ...@@ -18,20 +18,20 @@ declare default element namespace "http://www.tei-c.org/ns/1.0";
(:############################# Modules Functions #############################:) (:############################# Modules Functions #############################:)
(:~ (:~
: pre:preprocessing-textNode : pre:preprocessing-textNode
: preprocessing function which converts each text() into a xml-node "textNode". This function is a experimental fall back solution and not the main preprocessing routine! : preprocessing function which converts each text() into a xml-node "textNode". This function is a experimental fall back solution and not the main preprocessing routine!
: :
: @param $nodes the nodes to be converted : @param $nodes the nodes to be converted
: @return item()* representing the converted node : @return item()* representing the converted node
: :
: @version 1.2 (2017-10-15) : @version 1.2 (2017-10-15)
: @status working : @status working
: @author Uwe Sikora : @author Uwe Sikora
:) :)
declare function pre:preprocessing-textNode declare function pre:preprocessing-textNode
($nodes as node()*) as item()* { ($nodes as node()*) as item()* {
for $node in $nodes for $node in $nodes
return return
typeswitch($node) typeswitch($node)
...@@ -44,7 +44,7 @@ declare function pre:preprocessing-textNode ...@@ -44,7 +44,7 @@ declare function pre:preprocessing-textNode
} }
) )
) )
case element(TEI) return ( case element(TEI) return (
element{$node/name()}{ element{$node/name()}{
$node/@*, $node/@*,
...@@ -54,7 +54,7 @@ declare function pre:preprocessing-textNode ...@@ -54,7 +54,7 @@ declare function pre:preprocessing-textNode
} }
} }
) )
case element(lem) return ( case element(lem) return (
element{$node/name()}{ element{$node/name()}{
$node/@*, $node/@*,
...@@ -62,7 +62,7 @@ declare function pre:preprocessing-textNode ...@@ -62,7 +62,7 @@ declare function pre:preprocessing-textNode
pre:preprocessing-textNode($node/node()) pre:preprocessing-textNode($node/node())
} }
) )
case element(rdg) return ( case element(rdg) return (
element{$node/name()}{ element{$node/name()}{
$node/@*, $node/@*,
...@@ -70,7 +70,7 @@ declare function pre:preprocessing-textNode ...@@ -70,7 +70,7 @@ declare function pre:preprocessing-textNode
pre:preprocessing-textNode($node/node()) pre:preprocessing-textNode($node/node())
} }
) )
case element(note) return ( case element(note) return (
if ($node[@type eq "editorial"]) then ( if ($node[@type eq "editorial"]) then (
) else ( ) else (
...@@ -80,8 +80,8 @@ declare function pre:preprocessing-textNode ...@@ -80,8 +80,8 @@ declare function pre:preprocessing-textNode
} }
) )
) )
default return ( default return (
element{$node/name()}{ element{$node/name()}{
$node/@*, $node/@*,
pre:preprocessing-textNode($node/node()) pre:preprocessing-textNode($node/node())
...@@ -90,7 +90,7 @@ declare function pre:preprocessing-textNode ...@@ -90,7 +90,7 @@ declare function pre:preprocessing-textNode
}; };
(:~ (:~
: pre:pre:default-element : pre:pre:default-element
: function that suites as default element constructor for the preproseccing conversion. : function that suites as default element constructor for the preproseccing conversion.
: It is more or less a copy function, copying the elements name and its node and recurively leeds the conversion to its child-nodes : It is more or less a copy function, copying the elements name and its node and recurively leeds the conversion to its child-nodes
...@@ -98,7 +98,7 @@ declare function pre:preprocessing-textNode ...@@ -98,7 +98,7 @@ declare function pre:preprocessing-textNode
: @param $node the node to be copied : @param $node the node to be copied
: @param $recursive-function the recursive function as some kind of call back to the main conversion : @param $recursive-function the recursive function as some kind of call back to the main conversion
: @return item()* representing the converted node : @return item()* representing the converted node
: :
: @version 1.0 (2018-01-31) : @version 1.0 (2018-01-31)
: @note Would be great if $recursive-function would be a real function and not a node-sequence (TO-DO) : @note Would be great if $recursive-function would be a real function and not a node-sequence (TO-DO)
: @status working : @status working
...@@ -106,39 +106,49 @@ declare function pre:preprocessing-textNode ...@@ -106,39 +106,49 @@ declare function pre:preprocessing-textNode
:) :)
declare function pre:default-element declare function pre:default-element
( $node as node(), $recursive-function as node()* ) as item()* { ( $node as node(), $recursive-function as node()* ) as item()* {
let $following-node := $node/following-sibling::node()[1]
element{$node/name()}{ let $following-sibling := $node/following-sibling::*[1]
$node/@*, return
$recursive-function element{$node/name()}{
} $node/@*,
(if($following-node[matches(., "[\s\n\r\t]") and normalize-space(.) = ""]
and $following-sibling[self::ref or self::app or self::hi or self::bibl
or self::foreign or self::choice or self::milestone or self::persName
or self::choice or self::index or self::seg]
and not($node[self::index]))
then
attribute {"break-after"}{"yes"}
else ()),
$recursive-function
}
}; };
(:~ (:~
: pre:preprocessing : pre:preprocessing
: main preprocessing function. : main preprocessing function.
: :
: @param $nodes the nodes to be converted : @param $nodes the nodes to be converted
: @return item()* representing the converted node : @return item()* representing the converted node
: :
: @version 2.0 (2018-02-01) : @version 2.0 (2018-02-01)
: @status working : @status working
: @author Uwe Sikora : @author Uwe Sikora
:) :)
declare function pre:preprocessing declare function pre:preprocessing
($nodes as node()*) as item()* { ($nodes as node()*) as item()* {
for $node in $nodes for $node in $nodes
return return
typeswitch($node) typeswitch($node)
case processing-instruction() return () case processing-instruction() return ()
case text() return ( case text() return (
whitespace:text($node, " ") whitespace:text($node, " ")
) )
case comment() return () case comment() return ()
case element(TEI) return ( case element(TEI) return (
element{$node/name()}{ element{$node/name()}{
$node/@*, $node/@*,
...@@ -150,19 +160,19 @@ declare function pre:preprocessing ...@@ -150,19 +160,19 @@ declare function pre:preprocessing
} }
} }
) )
case element(teiHeader) return ( $node ) case element(teiHeader) return ( $node )
case element(div) return ( case element(div) return (
if ($node[@type = 'section-group']) then ( if ($node[@type = 'section-group']) then (
pre:preprocessing($node/node()) pre:preprocessing($node/node())
) )
else ( else (
pre:default-element( $node, pre:preprocessing($node/node()) ) pre:default-element( $node, pre:preprocessing($node/node()) )
) )
) )
case element(lem) return ( case element(lem) return (
element{$node/name()}{ element{$node/name()}{
$node/@*, $node/@*,
...@@ -170,7 +180,7 @@ declare function pre:preprocessing ...@@ -170,7 +180,7 @@ declare function pre:preprocessing
pre:preprocessing($node/node()) pre:preprocessing($node/node())
} }
) )
case element(rdg) return ( case element(rdg) return (
element{$node/name()}{ element{$node/name()}{
$node/@*, $node/@*,
...@@ -178,87 +188,84 @@ declare function pre:preprocessing ...@@ -178,87 +188,84 @@ declare function pre:preprocessing
pre:preprocessing($node/node()) pre:preprocessing($node/node())
} }
) )
case element(note) return ( case element(note) return (
if ( $node[@type != "editorial"] or $node[ not(@type) ] ) then ( if ( $node[@type != "editorial"] or $node[ not(@type) ] ) then (
pre:default-element( $node, pre:preprocessing($node/node()) ) pre:default-element( $node, pre:preprocessing($node/node()) )
) else ( ) ) else ( )
) )
case element(pb) return ( case element(pb) return (
let $preceding-sibling := $node/preceding-sibling::node()[1] let $preceding-sibling := $node/preceding-sibling::node()[1]
let $following-sibling := $node/following-sibling::node()[1] let $following-sibling := $node/following-sibling::node()[1]
let $first := $node = $node/parent::node()/node()[not(self::text() and normalize-space(self::node()) = '')][1]
let $ignore := ("docAuthor", "app", "index", "seg", "bibl")
return return
element {$node/name()}{ element {$node/name()}{
$node/@*, $node/@*,
if ( if (
( $preceding-sibling[self::text() and not(normalize-space(.) = '')] and ends-with($preceding-sibling, " ") = false() ) ( $preceding-sibling[self::text() and not(normalize-space(.) = '')] and ends-with($preceding-sibling, " ") = false() )
and and
( $following-sibling[self::text() and not(normalize-space(.) = '')] and starts-with($following-sibling, " ") = false() ) ( $following-sibling[self::text() and not(normalize-space(.) = '')] and starts-with($following-sibling, " ") = false() )
) then ( attribute {"break"}{"no"} ) ) then ( attribute {"break"}{"no"} )
else if (
(:else if ( ( $preceding-sibling[matches(., "[\s\n\r\t]") and normalize-space(.) = ""] )
( $preceeding-sibling[self::text() and not(normalize-space(.) = '')] and ends-with($preceeding-sibling, " ") = true() )
and
( $following-sibling[self::text() and not(normalize-space(.) = '')] and starts-with($following-sibling, " ") = true() )
) then ( attribute {"clear"}{"left"} ) :)
else if ( $following-sibling[self::docAuthor or self::app or self::index or self::seg or self::bibl] ) then (
attribute {"break"}{"yes"}
)
else if (
( $preceding-sibling[self::text()][matches(., "[\s\n\r\t]") and normalize-space(.) = ""] )
and and
( $following-sibling[self::text()][matches(., "[\s\n\r\t]") and normalize-space(.) = ""] ) ( $following-sibling[matches(., "[\s\n\r\t]") and normalize-space(.) = ""] )
) then ( ) then (
attribute {"break"}{"yes"} attribute {"break-before"}{"yes"},
attribute {"break-after"}{"yes"}
)
else if (
$preceding-sibling[matches(., "[\s\n\r\t]") and normalize-space(.) = ""]
) then (
attribute {"break-before"}{"yes"}
)
else if (
$following-sibling[matches(., "[\s\n\r\t]") and normalize-space(.) = ""]
) then (
attribute {"break-after"}{"yes"}
) )
else ( ) else ( )
} }
) )
case element(hi) return ( case element(hi) return (
if($node[@rend = 'right-aligned' or @rend = 'center-aligned']) then( if($node[@rend = 'right-aligned' or @rend = 'center-aligned']) then(
element {'aligned'} { element {'aligned'} {
$node/@*, $node/@*,
pre:preprocessing($node/node()) pre:preprocessing($node/node())
} }
) )
else ( else (
pre:default-element( $node, pre:preprocessing($node/node()) ) pre:default-element( $node, pre:preprocessing($node/node()) )
) )
) )
case element(seg) return ( case element(seg) return (
if($node[@type = 'item']) then( if($node[@type = 'item']) then(
element {'item'} { element {'item'} {
$node/@*[name() != 'type'], $node/@*[name() != 'type'],
pre:preprocessing($node/node()) pre:preprocessing($node/node())
} }
) )
else if($node[@type = 'head']) then( else if($node[@type = 'head']) then(
element {'head'} { element {'head'} {
$node/@*[name() != 'type'], $node/@*[name() != 'type'],
pre:preprocessing($node/node()) pre:preprocessing($node/node())
} }
) )
else if($node[@type = 'row']) then( else if($node[@type = 'row']) then(
element {'row'} { element {'row'} {
$node/@*[name() != 'type'], $node/@*[name() != 'type'],
pre:preprocessing($node/node()) pre:preprocessing($node/node())
} }
) )
else ( else (
pre:default-element( $node, pre:preprocessing($node/node()) ) pre:default-element( $node, pre:preprocessing($node/node()) )
) )
) )
default return ( default return (
pre:default-element( $node, pre:preprocessing($node/node()) ) pre:default-element( $node, pre:preprocessing($node/node()) )
) )
}; };
\ No newline at end of file
xquery version "3.0"; xquery version "3.0";
(:~ (:~
: WHITESPACE Module ("whitespace", "http://bdn.edition.de/intermediate_format/whitespace_handling") : WHITESPACE Module ("whitespace", "http://bdn.edition.de/intermediate_format/whitespace_handling")
: ******************************************************************************************* : *******************************************************************************************
: This module contains the functions to handle different whitespace operations on text : This module contains the functions to handle different whitespace operations on text
...@@ -9,12 +9,14 @@ xquery version "3.0"; ...@@ -9,12 +9,14 @@ xquery version "3.0";
: @author Uwe Sikora : @author Uwe Sikora
:) :)
module namespace whitespace="http://bdn.edition.de/intermediate_format/whitespace_handling"; module namespace whitespace="http://bdn.edition.de/intermediate_format/whitespace_handling";
import module namespace pre="http://bdn.edition.de/intermediate_format/preprocessing" at "preprocessing.xqm";
declare default element namespace "http://www.tei-c.org/ns/1.0"; declare default element namespace "http://www.tei-c.org/ns/1.0";
(:############################# Modules Functions #############################:) (:############################# Modules Functions #############################:)
(:~ (:~
: whitespace:text() : whitespace:text()
: This function handles whitespace in defined text() nodes : This function handles whitespace in defined text() nodes
: :
...@@ -28,26 +30,26 @@ declare default element namespace "http://www.tei-c.org/ns/1.0"; ...@@ -28,26 +30,26 @@ declare default element namespace "http://www.tei-c.org/ns/1.0";
:) :)
declare function whitespace:text declare function whitespace:text
( $text as text()*, $escape-char as xs:string? ) as text()* { ( $text as text()*, $escape-char as xs:string? ) as text()* {
let $normalized := normalize-space($text) let $normalized := normalize-space($text)
let $whitespace-node := $text[matches(., "[\s\n\r\t]") and normalize-space(.) = ""] let $whitespace-node := $text[matches(., "[\s\n\r\t]") and normalize-space(.) = ""]
let $single-whitespace-between-nodes := $text = ' ' let $single-whitespace-between-nodes := $text = ' '
return return
if ( not($whitespace-node) or $single-whitespace-between-nodes) then ( if ( not($whitespace-node) or $single-whitespace-between-nodes) then (
if ($escape-char) then ( if ($escape-char) then (
whitespace:escape-text($text, "#") whitespace:escape-text($text, "@")
) else ( whitespace:escape-text($text, " ") ) ) else ( whitespace:escape-text($text, " ") )
) )
else () else ()
}; };
(:~ (:~
: whitespace:escape-text() : whitespace:escape-text()
: This function replaces whitespaces in a text() : This function replaces whitespaces in a text()
: with a defined preservation character : with a defined preservation character
: :
: @param $text the text-node to be converted : @param $text the text-node to be converted
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment