Commit 87d4607d authored by mrodzis's avatar mrodzis 🌎 Committed by mrodzis
Browse files

Improve handshift handling

parent 71e53905
......@@ -32,62 +32,63 @@ declare variable $tidySimple:valid-hands :=
declare function tidySimple:main($tei as node()*) {
let $tidy := tidySimple:sort-out-invalid-hands($tei//tei:text)
let $tidy := tidySimple:sort-out-invalid-hands($tei//tei:text)
=> tidySimple:sort-out-surplus-elements()
=> tidySimple:enhance-handshifts()
=> tidySimple:whitespaces()
=> tidySimple:summarize-entries()
=> tidySimple:split-headings()
(: => tidySimple:sort():)
(: let $text-with-sections := tidySimple:make-structure($clear-surplus-hands):)
let $header := $tei/tei:teiHeader
return
return
xmldb:store("/db/apps/SADE/resources/xml/", "fontane-tei.xml", <TEI xmlns="http://www.tei-c.org/ns/1.0" id="{$tei//tei:TEI/@id}">{$header}{$tidy}</TEI>)
};
declare function tidySimple:sort-out-invalid-hands($nodes as node()*)
declare function tidySimple:sort-out-invalid-hands($nodes as node()*)
as node()* {
for $node in $nodes return
typeswitch ($node)
case text() return
if($node/preceding::tei:milestone[@unit = "handshift"][1]
if($node/preceding::tei:milestone[@unit = "handshift"][1]
and not(simpleHelpers:is-hand-valid($tidySimple:valid-hands, $node/preceding::tei:milestone[@unit = "handshift"][1]))) then
()
else
$node
(: all lines have to be preserved because of the editorial commentary
which references the lines in the notebooks. if we omitted @unit = "line"
referencing wouldn't work any longer :)
referencing wouldn't work any longer :)
case element(tei:milestone) return
let $prev-handshift := $node/preceding::tei:milestone[@unit = "handshift"][1]
return
if($node/@unit = "handshift" and
simpleHelpers:is-hand-valid($tidySimple:valid-hands, $node)) then
tidySimple:clear-handshift($node)
tidySimple:copy-element($node, "post")
else if($node/@unit = "handshift") then
()
else if($prev-handshift
and $node/@unit = "line"
and $node/@unit = "line"
and not(simpleHelpers:is-hand-valid($tidySimple:valid-hands, $prev-handshift))) then
tidySimple:copy-element($node, "post")
else if($prev-handshift
and not(simpleHelpers:is-hand-valid($tidySimple:valid-hands, $prev-handshift))) then
()
else
tidySimple:copy-element($node, "post")
default return
let $prev-handshift := $node/preceding::tei:milestone[@unit = "handshift"][1]
let $first-child-handshift := $node/child::tei:milestone[@unit = "handshift"][1]
let $first-child-element := $node/child::*[1]
let $first-child-node := $node/child::node()[1]
return
(: in some cases the valid handshift is the first child node
(: in some cases the valid handshift is the first child node
instead of a previous node. of course we want to keep the element
then :)
if($first-child-element = $first-child-handshift
......@@ -96,31 +97,34 @@ as node()* {
or $first-child-element = $first-child-node)
and simpleHelpers:is-hand-valid($tidySimple:valid-hands, $first-child-handshift)) then
tidySimple:copy-element($node, "post")
else if($prev-handshift
and not(simpleHelpers:is-hand-valid($tidySimple:valid-hands, $prev-handshift))) then
()
else
tidySimple:copy-element($node, "post")
};
declare function tidySimple:sort-out-surplus-elements($nodes as node()*)
declare function tidySimple:sort-out-surplus-elements($nodes as node()*)
as node()* {
for $node in $nodes return
typeswitch ($node)
case text() return
$node
case element(tei:milestone) return
if($node/@unit = "handshift") then
if(simpleHelpers:is-prev-hand-same($node)
or not(tidySimple:has-hand-text($node))) then
()
else
tidySimple:clear-handshift($node)
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:sort-out-surplus-elements($node/node())
}
else if($node/@unit = "line") then
let $next-pb := $node/following::tei:pb[1]
let $nodes-on-page := $node/preceding::tei:pb[1]/following::node()[. << $next-pb][functx:node-kind(.) = "text"]
......@@ -128,7 +132,7 @@ as node()* {
for $node-pb in $nodes-on-page return
if(normalize-space($node-pb) != "") then
true()
else
else
false()
return
if(functx:is-value-in-sequence(true(), $contains-text)) then
......@@ -138,13 +142,13 @@ as node()* {
}
else
()
else
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:sort-out-surplus-elements($node/node())
}
}
default return
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
......@@ -158,14 +162,14 @@ as xs:boolean {
let $nodes-between := $node/following::node()[. << $next-handshift]
let $is-text-node :=
for $node-between in $nodes-between
return
return
if ($node-between[self::text()]
and not(normalize-space($node-between) = "")) then
true()
else
false()
return
if($next-handshift
return
if($next-handshift
and functx:is-value-in-sequence(true(), $is-text-node)) then
true()
else if(not($next-handshift)) then
......@@ -182,11 +186,13 @@ declare function tidySimple:copy-element($node as node(), $flag as xs:string)
if($flag = "structure") then
tidySimple:make-structure($node/node())
else if($flag = "post") then
tidySimple:sort-out-invalid-hands($node/node())
tidySimple:sort-out-invalid-hands($node/node())
else if($flag = "surplus") then
tidySimple:sort-out-invalid-hands($node/node())
tidySimple:sort-out-invalid-hands($node/node())
else if($flag = "summarize") then
tidySimple:summarize-entries($node/node())
else if($flag = "hs-enhance") then
tidySimple:enhance-handshifts($node/node())
else
tidySimple:tidy($node/node())
}
......@@ -194,7 +200,7 @@ declare function tidySimple:copy-element($node as node(), $flag as xs:string)
(:~
: Purges surplus attributes from tei:milestone[@unit = "handshift"].
:
:
: @author Michelle Weidling
: @param $node the current tei:milestone[@unit = "handshift"]
: @return the purged tei:milestone[@unit = "handshift"]
......@@ -203,8 +209,7 @@ declare function tidySimple:clear-handshift($node as element(tei:milestone))
as element(tei:milestone) {
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
attribute unit {"handshift"},
$node/@ws-before,
$node/@ws-after,
$node/(@* except (@subtype, @rend)),
if($node/@subtype = "") then
()
else
......@@ -217,8 +222,58 @@ as element(tei:milestone) {
};
(:~
:
declare function tidySimple:enhance-handshifts($nodes as node()*)
as node()* {
for $node in $nodes return
typeswitch ($node)
case text() return
$node
case element(tei:milestone) return
if($node/@unit = "handshift") then
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/(@* except (@script, @medium)),
let $script := $node/@script/string()
let $medium := $node/@medium/string()
let $enhanced-script := tidySimple:enhance-script($node, $script)
let $enhanced-medium := tidySimple:enhance-medium($node, $medium)
return
attribute rend {$enhanced-script || " " || $enhanced-medium}
}
else
tidySimple:copy-element($node, "hs-enhance")
default return
tidySimple:copy-element($node, "hs-enhance")
};
declare function tidySimple:enhance-script($node as node(), $script as xs:string?) as xs:string {
if(not($script) or $script = "script()") then
let $prev-relevant-hs := $node/preceding::tei:milestone[@unit = "handshift"][@script and not(@script = "script()")][1]
return
if($prev-relevant-hs) then
$prev-relevant-hs/@script/string()
else
"Latf"
else
$script
};
declare function tidySimple:enhance-medium($node as node(), $medium as xs:string?) as xs:string {
if(not($medium) or $medium = "medium()") then
let $prev-relevant-hs := $node/preceding::tei:milestone[@unit = "handshift"][@medium and not(@medium = "medium()")][1]
return
if($prev-relevant-hs) then
$prev-relevant-hs/@medium/string()
else
"pencil"
else
$medium
};
(:~
:
: :)
declare function tidySimple:make-structure($nodes as node()*) as node()* {
for $node in $nodes return
......@@ -228,29 +283,29 @@ declare function tidySimple:make-structure($nodes as node()*) as node()* {
()
else
$node
case element(tei:milestone) return
case element(tei:milestone) return
if($node/@unit = "section") then
tidySimple:make-section($node)
else if($node/@unit = "paragraph") then
tidySimple:copy-element($node, "structure")
(: lines in sections have to be ignored since they are copied into
the section with get-section-chunk :)
(: lines in sections have to be ignored since they are copied into
the section with get-section-chunk :)
else if($node/@unit = "line"
and tidySimple:is-in-section($node)) then
()
else
tidySimple:copy-element($node, "structure")
case element(tei:anchor) return
if (matches($node/preceding::tei:milestone/@spanTo, $node/@xml:id)) then
()
else
tidySimple:copy-element($node, "structure")
default return
default return
if(tidySimple:is-in-section($node)) then
()
else
......@@ -260,7 +315,7 @@ declare function tidySimple:make-structure($nodes as node()*) as node()* {
(:~
: Checks if a given node is between a tei:milestone[@unit = "section"] and its
: corresponing tei:anchor.
:
:
: @author Michelle Weidling
: @param the current node
: @return xs:boolean
......@@ -270,7 +325,7 @@ declare function tidySimple:is-in-section($node as node()) as xs:boolean {
let $target-id := replace($prev-section-marker, "#", "")
let $next-anchor := $node/following::tei:anchor[matches(@xml:id, $target-id)]
return
if($prev-section-marker
if($prev-section-marker
and $next-anchor) then
true()
else
......@@ -279,18 +334,18 @@ declare function tidySimple:is-in-section($node as node()) as xs:boolean {
(:~
: Creates a tei:div from a given tei:milestone[@unit = "section"] that contains
: Creates a tei:div from a given tei:milestone[@unit = "section"] that contains
: all elements from a given tei:milestone to its corresponding tei:anchor.
:
: Since the nodes between the current and the target one may also contain
:
: Since the nodes between the current and the target one may also contain
: structural information (e.g. tei:milestone[@unit = "paragraph"]), these nodes
: cannot simply be copied but have to be transformed as well.
:
:
: @author Michelle Weidling
: @param $node the current tei:milestone[@unit = "section"]
: @return element(tei:div)
: :)
declare function tidySimple:make-section($node as element(tei:milestone))
declare function tidySimple:make-section($node as element(tei:milestone))
as element(tei:div) {
element tei:div {
attribute type {"section"},
......@@ -300,14 +355,14 @@ as element(tei:div) {
(:~
: Returns all nodes between a tei:milestone[@unit = "section"] and its
: Returns all nodes between a tei:milestone[@unit = "section"] and its
: corresponding tei:anchor.
:
: @author Michelle Weidling
: @param the current tei:milestone element
: @return all nodes between the current element and the respective tei:anchor
:)
declare function tidySimple:get-section-chunk($node as element(tei:milestone))
declare function tidySimple:get-section-chunk($node as element(tei:milestone))
as node()* {
let $target-id := substring-after($node/@spanTo, "#")
let $target := $node/following::tei:anchor[matches(@xml:id, $target-id)]
......@@ -325,7 +380,7 @@ declare function tidySimple:whitespaces($nodes as node()*) as node()* {
typeswitch($node)
case text() return
$node
default return
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
......@@ -353,7 +408,7 @@ declare function tidySimple:split-headings($nodes as node()*) as node()* {
typeswitch($node)
case text() return
$node
case element(tei:head) return
if($node//tei:lb[@type = "edited_text"]) then
let $lb := $node//tei:lb[@type = "edited_text"]
......@@ -374,13 +429,13 @@ declare function tidySimple:split-headings($nodes as node()*) as node()* {
$split-bottom
})
}
else
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:split-headings($node/node())
}
default return
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
......@@ -406,10 +461,10 @@ declare function tidySimple:split-headings($nodes as node()*) as node()* {
declare function tidySimple:summarize-entries($nodes as node()*) as node()* {
for $node in $nodes return
typeswitch ($node)
case text() return
$node
case element(tei:hi) return
if($node/@next and not($node/@prev)) then
let $this-content := $node/child::text()
......@@ -423,12 +478,12 @@ declare function tidySimple:summarize-entries($nodes as node()*) as node()* {
()
else
tidySimple:copy-element($node, "summarize")
default return
tidySimple:copy-element($node, "summarize")
};
declare function tidySimple:get-next-content($node as node()*, $content as node()*)
declare function tidySimple:get-next-content($node as node()*, $content as node()*)
as node()* {
if($node/@next) then
let $next-hi-id := substring-after($node/@next, "#")
......@@ -437,8 +492,8 @@ as node()* {
let $break := if($node/@break = "true") then text{" "} else ()
let $content := ($content, $break, $next-content)
return
return
tidySimple:get-next-content($next-node, $content)
else
$content
};
\ No newline at end of file
};
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment