Skip to content
Snippets Groups Projects
Unverified Commit 0ba4a38d authored by uwe's avatar uwe Committed by GitHub
Browse files

Merge pull request #12 from MRodz/master

added further preprocessing for whitespaces
parents a69dfd44 c1d871df
No related branches found
No related tags found
No related merge requests found
xquery version "3.0";
(:~
(:~
: MARKERSET Module ("markerset", "http://bdn.edition.de/intermediate_format/markerset")
: *******************************************************************************************
: This module is a helper module and defines functions to collect and construct reading markers
......@@ -14,32 +14,32 @@ declare default element namespace "http://www.tei-c.org/ns/1.0";
(:############################# Modules Functions #############################:)
(:~
(:~
: markerset:collect-markers()
: This function collect markers for a given reading.
: It destinguishes tei:lem and tei:rdg. In case of tei:lem it collects all sibling tei:rdgs. In case of tei:rdg it collect itself.
:
: @param $reading the reading node to collect readings for
: @return node() representing a markerset of readings for the given node
:
:
: @version 2.0 (2018-01-29)
: @status working
: @author Uwe Sikora
:)
declare function markerset:collect-markers
( $reading as node()* ) as item() {
let $markers := (
if ($reading[self::lem]) then (
attribute {"count"}{count($reading/following-sibling::rdg)},
for $sibling in $reading/following-sibling::rdg
return(
element {name($sibling)} {
$sibling/@*,
$sibling/@*,
attribute {"context"}{"lem"}
}
)
)
)
else if ($reading[self::rdg]) then (
element {name($reading)} {
$reading/@*,
......@@ -56,24 +56,24 @@ declare function markerset:collect-markers
};
(:~
(:~
: markerset:merge-markers()
: This function merges markers in a given set by the same type. It orders the merged markers according to an explicit ordering.
:
: @param $markerset node() including the markers that should be merged
: @return node()* representing the merged markerset
:
:
: @version 2.0 (2018-01-29)
: @status working
: @author Uwe Sikora
:)
declare function markerset:merge-markers
( $markerset as node()* ) as item()* {
let $order := ("om","ppl", "ptl", "pp", "pt" , "v")
let $reading-types := distinct-values( $markerset[self::rdg or self::lem]/string(@type) )
return (
return (
attribute {"order"}{distinct-values( ($order, $reading-types) ) },
for $type in distinct-values( ($order, $reading-types) )
let $rdgs := $markerset[@type = $type]
......@@ -86,14 +86,14 @@ declare function markerset:merge-markers
attribute type {$type}
}
) else ()
)
};
(:~
(:~
: markerset:marker()
: Constructor function which creates the marker element with name, mark-type and references
: Constructor function which creates the marker element with name, mark-type and references
:
: @param $name The name of the marker element
: @param $mark The mark type e.g. open or close
......@@ -105,19 +105,20 @@ declare function markerset:merge-markers
:)
declare function markerset:marker
($name as xs:string, $type as xs:string, $reading as node()) as element(){
element {$name} {
if($type = 'open' and data($reading/@type) = 'v' and $reading/@context = 'rdg')
then ()
else (element {$name} {
(:attribute bdnp_parent {$node/parent::node()/name()}, :)
attribute wit { replace(data($reading/@wit), '#', '') },
attribute type { data($reading/@type) },
attribute ref { data($reading/@id) },
attribute mark { $type },
attribute context { $reading/@context }
}
})
};
(:~
(:~
: markerset:construct-marker-from-markerset
: Helping function to construct markers for a sequence of markersets
:
......@@ -131,9 +132,9 @@ declare function markerset:marker
:)
declare function markerset:construct-marker-from-markerset
( $name as xs:string, $marker-type as xs:string, $marker-set as node()* ) as item()* {
for $marker in $marker-set/node()
return (
markerset:marker($name, $marker-type, $marker)
)
};
\ No newline at end of file
};
xquery version "3.0";
(:~
(:~
: PREPROCESSING Module ("pre", "http://bdn.edition.de/intermediate_format/preprocessing")
: *******************************************************************************************
: This module contains the preprocessing routines for the intermediate format
:
: It imports the whitespace handling helper module to make some whitespace handling duricng the preprocessing
: @version 2.0 (2018-01-29)
: @status working
: @author Uwe Sikora
......@@ -18,20 +18,20 @@ declare default element namespace "http://www.tei-c.org/ns/1.0";
(:############################# Modules Functions #############################:)
(:~
(:~
: pre:preprocessing-textNode
: preprocessing function which converts each text() into a xml-node "textNode". This function is a experimental fall back solution and not the main preprocessing routine!
:
: @param $nodes the nodes to be converted
: @return item()* representing the converted node
:
:
: @version 1.2 (2017-10-15)
: @status working
: @author Uwe Sikora
:)
declare function pre:preprocessing-textNode
($nodes as node()*) as item()* {
for $node in $nodes
return
typeswitch($node)
......@@ -44,7 +44,7 @@ declare function pre:preprocessing-textNode
}
)
)
case element(TEI) return (
element{$node/name()}{
$node/@*,
......@@ -54,7 +54,7 @@ declare function pre:preprocessing-textNode
}
}
)
case element(lem) return (
element{$node/name()}{
$node/@*,
......@@ -62,7 +62,7 @@ declare function pre:preprocessing-textNode
pre:preprocessing-textNode($node/node())
}
)
case element(rdg) return (
element{$node/name()}{
$node/@*,
......@@ -70,7 +70,7 @@ declare function pre:preprocessing-textNode
pre:preprocessing-textNode($node/node())
}
)
case element(note) return (
if ($node[@type eq "editorial"]) then (
) else (
......@@ -80,8 +80,8 @@ declare function pre:preprocessing-textNode
}
)
)
default return (
default return (
element{$node/name()}{
$node/@*,
pre:preprocessing-textNode($node/node())
......@@ -90,7 +90,7 @@ declare function pre:preprocessing-textNode
};
(:~
(:~
: pre:pre:default-element
: function that suites as default element constructor for the preproseccing conversion.
: It is more or less a copy function, copying the elements name and its node and recurively leeds the conversion to its child-nodes
......@@ -98,7 +98,7 @@ declare function pre:preprocessing-textNode
: @param $node the node to be copied
: @param $recursive-function the recursive function as some kind of call back to the main conversion
: @return item()* representing the converted node
:
:
: @version 1.0 (2018-01-31)
: @note Would be great if $recursive-function would be a real function and not a node-sequence (TO-DO)
: @status working
......@@ -106,39 +106,49 @@ declare function pre:preprocessing-textNode
:)
declare function pre:default-element
( $node as node(), $recursive-function as node()* ) as item()* {
element{$node/name()}{
$node/@*,
$recursive-function
}
let $following-node := $node/following-sibling::node()[1]
let $following-sibling := $node/following-sibling::*[1]
return
element{$node/name()}{
$node/@*,
(if($following-node[matches(., "[\s\n\r\t]") and normalize-space(.) = ""]
and $following-sibling[self::ref or self::app or self::hi or self::bibl
or self::foreign or self::choice or self::milestone or self::persName
or self::choice or self::index or self::seg]
and not($node[self::index]))
then
attribute {"break-after"}{"yes"}
else ()),
$recursive-function
}
};
(:~
(:~
: pre:preprocessing
: main preprocessing function.
:
: @param $nodes the nodes to be converted
: @return item()* representing the converted node
:
:
: @version 2.0 (2018-02-01)
: @status working
: @author Uwe Sikora
:)
declare function pre:preprocessing
($nodes as node()*) as item()* {
for $node in $nodes
return
typeswitch($node)
case processing-instruction() return ()
case text() return (
whitespace:text($node, " ")
)
case comment() return ()
case element(TEI) return (
element{$node/name()}{
$node/@*,
......@@ -150,19 +160,19 @@ declare function pre:preprocessing
}
}
)
case element(teiHeader) return ( $node )
case element(div) return (
if ($node[@type = 'section-group']) then (
pre:preprocessing($node/node())
)
)
else (
pre:default-element( $node, pre:preprocessing($node/node()) )
)
)
case element(lem) return (
element{$node/name()}{
$node/@*,
......@@ -170,7 +180,7 @@ declare function pre:preprocessing
pre:preprocessing($node/node())
}
)
case element(rdg) return (
element{$node/name()}{
$node/@*,
......@@ -178,87 +188,84 @@ declare function pre:preprocessing
pre:preprocessing($node/node())
}
)
case element(note) return (
if ( $node[@type != "editorial"] or $node[ not(@type) ] ) then (
pre:default-element( $node, pre:preprocessing($node/node()) )
) else ( )
)
case element(pb) return (
let $preceding-sibling := $node/preceding-sibling::node()[1]
let $following-sibling := $node/following-sibling::node()[1]
let $first := $node = $node/parent::node()/node()[not(self::text() and normalize-space(self::node()) = '')][1]
let $ignore := ("docAuthor", "app", "index", "seg", "bibl")
return
element {$node/name()}{
$node/@*,
if (
if (
( $preceding-sibling[self::text() and not(normalize-space(.) = '')] and ends-with($preceding-sibling, " ") = false() )
and
( $following-sibling[self::text() and not(normalize-space(.) = '')] and starts-with($following-sibling, " ") = false() )
) then ( attribute {"break"}{"no"} )
(:else if (
( $preceeding-sibling[self::text() and not(normalize-space(.) = '')] and ends-with($preceeding-sibling, " ") = true() )
and
( $following-sibling[self::text() and not(normalize-space(.) = '')] and starts-with($following-sibling, " ") = true() )
) then ( attribute {"clear"}{"left"} ) :)
else if ( $following-sibling[self::docAuthor or self::app or self::index or self::seg or self::bibl] ) then (
attribute {"break"}{"yes"}
)
else if (
( $preceding-sibling[self::text()][matches(., "[\s\n\r\t]") and normalize-space(.) = ""] )
) then ( attribute {"break"}{"no"} )
else if (
( $preceding-sibling[matches(., "[\s\n\r\t]") and normalize-space(.) = ""] )
and
( $following-sibling[self::text()][matches(., "[\s\n\r\t]") and normalize-space(.) = ""] )
( $following-sibling[matches(., "[\s\n\r\t]") and normalize-space(.) = ""] )
) then (
attribute {"break"}{"yes"}
attribute {"break-before"}{"yes"},
attribute {"break-after"}{"yes"}
)
else if (
$preceding-sibling[matches(., "[\s\n\r\t]") and normalize-space(.) = ""]
) then (
attribute {"break-before"}{"yes"}
)
else if (
$following-sibling[matches(., "[\s\n\r\t]") and normalize-space(.) = ""]
) then (
attribute {"break-after"}{"yes"}
)
else ( )
}
)
case element(hi) return (
if($node[@rend = 'right-aligned' or @rend = 'center-aligned']) then(
if($node[@rend = 'right-aligned' or @rend = 'center-aligned']) then(
element {'aligned'} {
$node/@*,
pre:preprocessing($node/node())
}
}
)
else (
pre:default-element( $node, pre:preprocessing($node/node()) )
)
)
case element(seg) return (
if($node[@type = 'item']) then(
element {'item'} {
$node/@*[name() != 'type'],
pre:preprocessing($node/node())
}
}
)
else if($node[@type = 'head']) then(
element {'head'} {
$node/@*[name() != 'type'],
pre:preprocessing($node/node())
}
}
)
else if($node[@type = 'row']) then(
element {'row'} {
$node/@*[name() != 'type'],
pre:preprocessing($node/node())
}
}
)
else (
pre:default-element( $node, pre:preprocessing($node/node()) )
)
)
default return (
default return (
pre:default-element( $node, pre:preprocessing($node/node()) )
)
};
\ No newline at end of file
};
xquery version "3.0";
(:~
(:~
: WHITESPACE Module ("whitespace", "http://bdn.edition.de/intermediate_format/whitespace_handling")
: *******************************************************************************************
: This module contains the functions to handle different whitespace operations on text
......@@ -9,12 +9,14 @@ xquery version "3.0";
: @author Uwe Sikora
:)
module namespace whitespace="http://bdn.edition.de/intermediate_format/whitespace_handling";
import module namespace pre="http://bdn.edition.de/intermediate_format/preprocessing" at "preprocessing.xqm";
declare default element namespace "http://www.tei-c.org/ns/1.0";
(:############################# Modules Functions #############################:)
(:~
(:~
: whitespace:text()
: This function handles whitespace in defined text() nodes
:
......@@ -28,26 +30,26 @@ declare default element namespace "http://www.tei-c.org/ns/1.0";
:)
declare function whitespace:text
( $text as text()*, $escape-char as xs:string? ) as text()* {
let $normalized := normalize-space($text)
let $whitespace-node := $text[matches(., "[\s\n\r\t]") and normalize-space(.) = ""]
let $single-whitespace-between-nodes := $text = ' '
return
return
if ( not($whitespace-node) or $single-whitespace-between-nodes) then (
if ($escape-char) then (
whitespace:escape-text($text, "#")
whitespace:escape-text($text, "@")
) else ( whitespace:escape-text($text, " ") )
)
)
else ()
};
(:~
(:~
: whitespace:escape-text()
: This function replaces whitespaces in a text()
: This function replaces whitespaces in a text()
: with a defined preservation character
:
: @param $text the text-node to be converted
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment