Commit 646fbae8 authored by MRodz's avatar MRodz Committed by mrodzis
Browse files

Restructure module and return paragraphs as milestones

Since restructuring the XML in such a way that tei:milestone[@unit = "paragraph"]
is expanded to tei:p is costly in terms of performance while not adding much
value we decided to drop this functionality and leave the milestones as they
are.
parent 8606c8c2
......@@ -59,7 +59,7 @@ declare
%test:assertEquals("<body xmlns=""http://www.tei-c.org/ns/1.0""><milestone xmlns=""http://www.tei-c.org/ns/1.0"" unit=""line""/><rs xmlns=""http://www.tei-c.org/ns/1.0"" type=""direct"" ref=""psn:Amalberga""><hi xmlns=""http://www.tei-c.org/ns/1.0"" xml:id=""C07_7r_d"" prev=""#C07_7r_c"">Amalberga</hi></rs></body>")
%test:args("<body xmlns=""http://www.tei-c.org/ns/1.0""><milestone xmlns=""http://www.tei-c.org/ns/1.0"" unit=""paragraph""/><milestone xmlns=""http://www.tei-c.org/ns/1.0"" unit=""line"" rendition=""indent""/>Dies dauerte <date xmlns=""http://www.tei-c.org/ns/1.0"" from=""1125"" to=""1425"" type=""asynchronous"">300 Jahr</date>.<milestone xmlns=""http://www.tei-c.org/ns/1.0"" unit=""paragraph""/></body>")
%test:assertEquals("<body xmlns=""http://www.tei-c.org/ns/1.0""><p xmlns=""http://www.tei-c.org/ns/1.0""><milestone xmlns=""http://www.tei-c.org/ns/1.0"" unit=""line"" rendition=""indent""/>Dies dauerte <date xmlns=""http://www.tei-c.org/ns/1.0"" from=""1125"" to=""1425"" type=""asynchronous"">300 Jahr</date>.</p></body>")
%test:assertEquals("<body xmlns=""http://www.tei-c.org/ns/1.0""><milestone xmlns=""http://www.tei-c.org/ns/1.0"" unit=""paragraph""/><milestone xmlns=""http://www.tei-c.org/ns/1.0"" unit=""line"" rendition=""indent""/>Dies dauerte <date xmlns=""http://www.tei-c.org/ns/1.0"" from=""1125"" to=""1425"" type=""asynchronous"">300 Jahr</date>.<milestone xmlns=""http://www.tei-c.org/ns/1.0"" unit=""paragraph""/></body>")
function tidysimple-test:sections($node as element(*)) {
tidySimple:make-structure($node)
......
......@@ -110,62 +110,6 @@ as node()* {
};
declare function tidySimple:tidy($nodes as node()*) as node()* {
()
};
declare function tidySimple:make-structure($nodes as node()*) as node()* {
for $node in $nodes return
typeswitch ($node)
case text() return
let $prev-section-marker := $node/preceding::tei:milestone[1][@spanTo]
let $target-id := replace($prev-section-marker, "#", "")
let $next-anchor := $node/following::tei:anchor[matches(@xml:id, $target-id)]
let $next-p := $node/following::tei:milestone[@unit = "paragraph"][1]
return
if($prev-section-marker and $next-anchor
or $next-p and tidySimple:is-second-part-of-pair($next-p)) then
()
else
$node
case element(tei:milestone) return
if($node/@unit = "section") then
tidySimple:make-section($node)
else if($node/@unit = "paragraph"
and not(tidySimple:is-second-part-of-pair($node))) then
tidySimple:make-paragraph($node)
else if($node/@unit = "line"
and $node/preceding::tei:milestone/@spanTo
and $node/following::tei:anchor[matches(preceding::tei:milestone/@spanTo, @xml:id)]
or $node/preceding::tei:milestone[@unit = "paragraph"]
and $node/preceding::tei:milestone[@unit = "paragraph"]) then
()
else
tidySimple:copy-element($node, "structure")
case element(tei:anchor) return
if (matches($node/preceding::tei:milestone/@spanTo, $node/@xml:id)) then
()
else
tidySimple:copy-element($node, "structure")
default return
let $prev-section-marker := $node/preceding::tei:milestone[1][@spanTo]
let $target-id := replace($prev-section-marker, "#", "")
let $next-anchor := $node/following::tei:anchor[matches(@xml:id, $target-id)]
let $next-p := $node/following::tei:milestone[@unit = "paragraph"][1]
return
if($prev-section-marker and $next-anchor
or $next-p and tidySimple:is-second-part-of-pair($next-p)) then
()
else
tidySimple:copy-element($node, "structure")
};
declare function tidySimple:copy-element($node as node(), $flag as xs:string)
{
element {QName("http://www.tei-c.org/ns/1.0", $node/name())}{
......@@ -181,7 +125,13 @@ declare function tidySimple:copy-element($node as node(), $flag as xs:string)
}
};
(:~
: Purges surplus attributes from tei:handShift.
:
: @author Michelle Rodzis
: @param $node the current tei:handShift
: @return the purged tei:handShift
: :)
declare function tidySimple:clear-handshift($node as element(tei:handShift))
as element(tei:handShift) {
element tei:handShift {
......@@ -201,7 +151,79 @@ as element(tei:handShift) {
};
(:~
:
: :)
declare function tidySimple:make-structure($nodes as node()*) as node()* {
for $node in $nodes return
typeswitch ($node)
case text() return
if(tidySimple:is-in-section($node)) then
()
else
$node
case element(tei:milestone) return
if($node/@unit = "section") then
tidySimple:make-section($node)
else if($node/@unit = "paragraph") then
tidySimple:copy-element($node, "structure")
(: lines in sections have to be ignored since they are copied into
the section with get-section-chunk :)
else if($node/@unit = "line"
and tidySimple:is-in-section($node)) then
()
else
tidySimple:copy-element($node, "structure")
case element(tei:anchor) return
if (matches($node/preceding::tei:milestone/@spanTo, $node/@xml:id)) then
()
else
tidySimple:copy-element($node, "structure")
default return
if(tidySimple:is-in-section($node)) then
()
else
tidySimple:copy-element($node, "structure")
};
(:~
: Checks if a given node is between a tei:milestone[@unit = "section"] and its
: corresponing tei:anchor.
:
: @author Michelle Rodzis
: @param the current node
: @return xs:boolean
: :)
declare function tidySimple:is-in-section($node as node()) as xs:boolean {
let $prev-section-marker := $node/preceding::tei:milestone[1][@spanTo]
let $target-id := replace($prev-section-marker, "#", "")
let $next-anchor := $node/following::tei:anchor[matches(@xml:id, $target-id)]
return
if($prev-section-marker
and $next-anchor) then
true()
else
false()
};
(:~
: Creates a tei:div from a given tei:milestone[@unit = "section"] that contains
: all elements from a given tei:milestone to its corresponding tei:anchor.
:
: Since the nodes between the current and the target one may also contain
: structural information (e.g. tei:milestone[@unit = "paragraph"]), these nodes
: cannot simply be copied but have to be transformed as well.
:
: @author Michelle Rodzis
: @param $node the current tei:milestone[@unit = "section"]
: @return element(tei:div)
: :)
declare function tidySimple:make-section($node as element(tei:milestone))
as element(tei:div) {
element tei:div {
......@@ -228,47 +250,6 @@ as node()* {
};
declare function tidySimple:make-paragraph($node as element(tei:milestone))
as element(tei:p) {
element tei:p {
tidySimple:get-paragraph-chunk($node)
}
};
(:~
: Returns all nodes between a tei:milestone[@unit = "paragraph"] and the next
: one.
:
: @author Michelle Rodzis
: @param the current tei:milestone element
: @return all nodes between the current element and the next tei:milestone[@unit = "paragraph"]
:)
declare function tidySimple:get-paragraph-chunk($node as element(tei:milestone))
as node()* {
let $target := $node/following::tei:milestone[@unit = "paragraph"][1]
return
$node/following-sibling::node()[. << $target]
};
(:~
: Checks if a given tei:milestone[@unit = "paragraph"] is the second part of a
: pair of tei:milestone[@unit = "paragraph"]. While tei:milestone[@unit = "section"]
: spans to a tei:anchor, each odd tei:milestone[@unit = "paragraph"] spans to
: the next even tei:milestone[@unit = "paragraph"] which causes the latter to
: be omitted.
:
: @author Michelle Rodzis
: @param the current tei:milestone element
: @return true() if the tei:milestone has an even position
: :)
declare function tidySimple:is-second-part-of-pair($node as element(tei:milestone))
as xs:boolean {
let $position := count($node/preceding::tei:milestone[@unit = "paragraph"]) + 1
return
if ($position mod 2 = 0) then
true()
else
false()
declare function tidySimple:tidy($nodes as node()*) as node()* {
()
};
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment