Commit c8ce4bd8 authored by mrodzis's avatar mrodzis 🌿
Browse files

Improve white spaces in HTML output of ET

parent e349ebd7
......@@ -17,7 +17,8 @@ import module namespace config="http://textgrid.de/ns/SADE/config" at "../../con
declare function simple2xhtml:main($nodes as node()*, $uri as xs:string) {
let $xhtml := element xhtml:div {simple2xhtml:recursion($nodes//tei:text)}
let $store := xmldb:store($config:data-root || "/print/xhtml/", $uri || ".html", $xhtml)
let $tidy := element xhtml:div {simple2xhtml:tidy($xhtml)}
let $store := xmldb:store($config:data-root || "/print/xhtml/", $uri || ".html", $tidy)
return
$xhtml
};
......@@ -56,12 +57,15 @@ declare function simple2xhtml:recursion($nodes as node()*) as node()* {
else
element xhtml:span {
simple2xhtml:set-hs-info($node, ()),
replace($node, "@P", "")
=> replace(" ,", ",")
replace($node, " ,", ",")
=> replace(" \?", "?")
=> replace(" \.", ".")
=> replace(" ;", ";")
=> replace("@", " ")
=> replace("@@", " ")
=> replace("@P", "")
=> replace("  ", " ")
=> replace(":", ": ")
=> replace(" “", "“")
}
case element(tei:body) return
......@@ -658,3 +662,22 @@ declare function simple2xhtml:make-integration($node as element(tei:seg)) {
}
}
};
declare function simple2xhtml:tidy($nodes as node()*) as node()* {
for $node in $nodes return
typeswitch ($node)
case text() return
if(string-length($node) = 1 and normalize-space($node) = ""
and matches(substring($node/following::text()[1], 1, 1), "[,\.\?]")) then
()
else
text{ replace($node, "[\s]+", " ")
=> replace(" ,", ",")
=> replace(" \.", ".")
=> replace(" :", ":") }
default return
simple2xhtml:tidy($node/node())
};
......@@ -56,7 +56,8 @@ xs:string) as node()? {
</text>
</TEI>
</teiCorpus>
let $store := xmldb:store($config:data-root || "/print/xml/", $uri || "-tmp.xml", $tei)
let $store := xmldb:store($config:data-root || "/print/xml/", $uri || "-tmp.xml", $tei)
return $tei
};
......@@ -774,7 +775,10 @@ $log as xs:string) as node()* {
(
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
fontaneSimple:make-index-infos($node, $index-type),
if($node/@prev) then
()
else
fontaneSimple:make-index-infos($node, $index-type),
fontaneSimple:transform($node/node(), $uri, $log)
},
if(not($node//tei:abbr/text()[ends-with(., ":")]
......
......@@ -72,33 +72,38 @@ as xs:boolean {
: @return text() the formatted text
: :)
declare function simpleHelpers:prepare-text($node as text()) as text()? {
if(not(normalize-space($node) = "")) then
if(normalize-space($node) = "" and string-length($node) gt 1) then
()
else
(: the @P serves as a flag for the removal of hyphens. this is necessary
since we sometimes have cases where a hyphen is the only content of a
string. functx:substring-before-last would therefore produce an empty
string which leads to problems while preparing the text any further. :)
let $cleared-end-hyphen :=
if((ends-with($node, "-") or ends-with($node, "⸗"))
and $node/parent::tei:add) then
$node
else if(ends-with($node, "-") and not(simpleHelpers:keep-hyphen($node))) then
(: if((ends-with($node, "-") or ends-with($node, "⸗")):)
(: and $node/parent::tei:add) then:)
(: $node:)
(: else :)
if(ends-with($node, "-") and not(simpleHelpers:keep-hyphen($node))) then
text {functx:substring-before-last($node, "-") || "@P"}
else if(ends-with($node, "⸗") and not(simpleHelpers:keep-hyphen($node))) then
text {functx:substring-before-last($node, "⸗") || "@P"}
else
replace($node, "⸗", "-")
let $cleared-hyphen := replace($cleared-end-hyphen, "⸗", "-")
let $save-whitespaces := replace($cleared-end-hyphen, " ", "@@")
let $cleared-hyphen := replace($save-whitespaces, "⸗", "-")
let $cleared-round-s := replace($cleared-hyphen, "ſ", "s")
let $cleared-Tironian := replace($cleared-round-s, "&#x204a;c.", "etc.")
(: let $normalized := normalize-space($cleared-round-s):)
(: let $last-char := substring($cleared-round-s, string-length($cleared-round-s), 1):)
(: let $add-whitespace := :)
(: if(matches($last-char, "[\w\d,\.;?!]"):)
(: and $node/parent::tei:line/child::*[last()] = . ) then:)
(: $cleared-round-s || " ":)
(: else:)
(: $cleared-round-s:)
(: return text {$add-whitespace}:)
let $cleared-big-space :=
if($node/following-sibling::*[1][self::tei:handShift]
and ends-with($node, "-&#x2003;")) then
text{replace($cleared-round-s, "-&#x2003;", "@P")}
else if($node/following-sibling::*[1][self::tei:handShift]
and ends-with($node, "&#x2003;")) then
text{replace($cleared-round-s, "&#x2003;", "")}
else
$cleared-round-s
let $escaped-big-space := text{replace($cleared-big-space, "&#x2003;", "?@?")}
let $cleared-Tironian := replace($cleared-big-space, "&#x204a;c.", "etc.")
return
(: in cases where a given $node only consists of a hyphen we don't
return a text node because it's unnecessary and leads to problems
......@@ -109,8 +114,6 @@ declare function simpleHelpers:prepare-text($node as text()) as text()? {
text {$cleared-Tironian}
else
()
else
()
};
......@@ -367,7 +370,7 @@ $node as element(tei:milestone)) as xs:boolean {
:)
declare function simpleHelpers:is-prev-hand-same($node as element(tei:milestone))
as xs:boolean {
let $prev-hand := $node/preceding::tei:milestone[@unit = "handshift"][1]
let $prev-hand := $node/preceding::*[@unit = "handshift"][1]
return
(: since we can't take the order of the attributes for granted we can't
use functx:sequence-deep-equal :)
......
......@@ -36,6 +36,7 @@ declare function tidySimple:main($tei as node()*, $uri as xs:string) {
=> tidySimple:sort-out-invalid-hands()
=> tidySimple:split-headings()
=> tidySimple:summarize()
=> tidySimple:summarize-hi()
let $header :=
tidySimple:get-Fontanes-sources($tei//tei:teiHeader[parent::tei:TEI])
=> tidySimple:get-references-in-abstract()
......@@ -150,6 +151,70 @@ as node()* {
tidySimple:sort-out-surplus-elements($node/node())
}
case element(tei:head) return
if(not($node/* or $node/node())) then
()
else
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:sort-out-surplus-elements($node/node())
}
case element(tei:date) return
if(not($node/* or $node/node())) then
()
else
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:sort-out-surplus-elements($node/node())
}
case element(tei:rs) return
if(not($node/* or $node/node())) then
()
else
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:sort-out-surplus-elements($node/node())
}
case element(tei:note) return
if(not($node/* or $node/node())) then
()
else
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:sort-out-surplus-elements($node/node())
}
case element(tei:abbr) return
if(not($node/* or $node/node())) then
()
else
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:sort-out-surplus-elements($node/node())
}
case element(tei:list) return
if(not($node/* or $node/node())) then
()
else
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:sort-out-surplus-elements($node/node())
}
case element(tei:item) return
if(not($node/* or $node/node())) then
()
else
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:sort-out-surplus-elements($node/node())
}
case element(tei:div) return
if($node/@type = "label"
and not($node/* or $node/node())) then
......@@ -204,6 +269,8 @@ declare function tidySimple:copy-element($node as node(), $flag as xs:string)
tidySimple:get-Fontanes-sources($node/node())
else if($flag = "summarize") then
tidySimple:summarize($node/node())
else if($flag = "summarize-hi") then
tidySimple:summarize-hi($node/node())
else if($flag = "ref") then
tidySimple:get-references-in-abstract($node/node())
else
......@@ -404,6 +471,17 @@ declare function tidySimple:summarize($nodes as node()*) as node()* {
case comment() return
$node
case element(tei:milestone) return
if($node/@unit = "line") then
if($node/preceding::*[1][self::tei:milestone[@unit = "line"]]
and $node/preceding::node()[1][self::tei:milestone[@unit = "line"]
or normalize-space(.) = ""]) then
()
else
tidySimple:summarize-entries($node)
else
tidySimple:summarize-entries($node)
case element(tei:rs) return
tidySimple:summarize-entries($node)
......@@ -413,8 +491,8 @@ declare function tidySimple:summarize($nodes as node()*) as node()* {
case element(tei:item) return
tidySimple:summarize-entries($node)
case element(tei:hi) return
tidySimple:summarize-entries($node)
(: case element(tei:hi) return:)
(: tidySimple:summarize-entries($node):)
case element(tei:seg) return
tidySimple:summarize-entries($node)
......@@ -520,3 +598,65 @@ declare function tidySimple:get-references-in-abstract($nodes as node()*) as nod
default return
tidySimple:copy-element($node, "ref")
};
declare function tidySimple:summarize-hi($nodes as node()*) as node()* {
for $node in $nodes return
typeswitch ($node)
case text() return
$node
case comment() return
$node
case element(tei:hi) return
tidySimple:summarize-entries($node)
default return
tidySimple:copy-element($node, "summarize-hi")
};
declare function tidySimple:summarize-hi-entries($node as node()) as node()* {
(: the first element of a virtual aggregation:)
if($node/@next and not($node/@prev)) then
element tei:hi {
$node/(@* except @next),
tidySimple:apply-all-hi-nexts($node)
}
else if($node/@prev) then
()
(: nodes that aren't part of the virtual aggregation :)
else
tidySimple:copy-element($node, "summarize-hi")
};
declare function tidySimple:apply-all-hi-nexts($node as node()) as node()* {
(: entry point of virtual aggregation:)
if($node/@next and not($node/@prev)) then
let $next-node := tidySimple:find-corresp-node($node, "next")
let $nodes-inbetween := $node/following::node()[. << $next-node]
return
if(count($next-node) = 1) then
(tidySimple:summarize-hi($node/node()),
$nodes-inbetween,
tidySimple:apply-all-hi-nexts($next-node))
else
()
(: last of a virtual aggregation: exit point :)
else if(not($node/@next)) then
tidySimple:summarize-hi($node/node())
(: element in the middle of a virtual aggregation:)
else
let $next-node := tidySimple:find-corresp-node($node, "next")
let $nodes-inbetween := $node/following::node()[. << $next-node]
return
if(count($next-node) = 1) then
(tidySimple:summarize-hi($node/node()),
$nodes-inbetween,
tidySimple:apply-all-hi-nexts($next-node))
else
()
};
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment