Dear Gitlab users, due to maintenance reasons, Gitlab will not be available on Thursday 30.09.2021 from 5:00 pm to approximately 5:30 pm.

Commit 9a5b057a authored by mrodzis's avatar mrodzis 🌎
Browse files

chore(tidysimple.xqm, ettransfo.xqm): add documentation, rename module, minor refactoring

parent b2e44299
......@@ -26,15 +26,15 @@ import module namespace fsort="http://fontane-nb.dariah.eu/sort" at "sort.xqm";
import module namespace functx = "http://www.functx.com";
import module namespace prepCom="http://fontane-nb.dariah.eu/prepCom" at "prepcom.xqm";
import module namespace simple2xhtml="http://fontane-nb.dariah.eu/simple2xhtml" at "simple2xhtml.xqm";
import module namespace tidySimple ="http://fontane-nb.dariah.eu/tidysimple" at "tidysimple.xqm";
import module namespace tidy ="http://fontane-nb.dariah.eu/tidy" at "tidy.xqm";
declare variable $etTransfo:cases :=
(
"3qtcz.xml", (: case C :)
"3qtqv.xml", (: case A :)
"3qtqw.xml", (: case B :)
"3qtqx.xml", (: case D :)
"3qtqz.xml" (: case E :)
(: "3qtcz.xml", (: case C :):)
(: "3qtqv.xml", (: case A :):)
(: "3qtqw.xml", (: case B :):)
"3qtqx.xml" (: case D :)
(: "3qtqz.xml" (: case E :):)
);
declare variable $etTransfo:coll := "/db/apps/SADE/modules/fontane/edited-text/";
......@@ -231,7 +231,7 @@ declare function etTransfo:transform-tei($tei as element(tei:TEI), $log as xs:st
let $tidy-interform :=
try {
tidySimple:main($transform-to-interform, $tei/@id)
tidy:main($transform-to-interform, $tei/@id)
} catch * {
etTransfo:add-log-entry($log, "ETTRANSFO12: Error while tidying up the intermediate format for this notebook. Reason: 
" ||
concat("[", $err:line-number, ": ", $err:column-number, "] Error ", $err:code, ": ", $err:description))
......
xquery version "3.1";
(:~
: This modules handles the conversion of the Fontante-TEI/XML into a TEI subset
: for the edited text. The resulting TEI is the basis for the "Editerter
: Text" (edited text) view on the website and the book. It represents the latest
: layer of text.
:
: Its main purpose is to tidy up the intermediate TEI that has been created by
: tei2teisimple.
:
: @author Michelle Weidling
: @version 0.1
: @since 0.0.0
:)
module namespace tidy ="http://fontane-nb.dariah.eu/tidy";
declare namespace tei="http://www.tei-c.org/ns/1.0";
declare namespace test="http://exist-db.org/xquery/xqsuite";
import module namespace config="http://textgrid.de/ns/SADE/config" at "../../config/config.xqm";
import module namespace functx="http://www.functx.com";
import module namespace index-info="http://fontane-nb.dariah.eu/index-info" at "index-info.xqm";
import module namespace simpleHelpers="http://fontane-nb.dariah.eu/teisimplehelpers" at "teisimplehelpers.xqm";
(: only contemporary hands (and selected posthumous hands) are considered for
: the edited text :)
declare variable $tidy:valid-hands :=
for $res in collection($config:data-root || "/data")
return
$res//tei:handNote[@script = "contemporary"]/@xml:id/string();
declare function tidy:main($tei as node()*, $uri as xs:string) {
let $tidy := tidy:enhance-handshifts($tei)
=> tidy:sort-out-surplus-elements()
=> tidy:sort-out-invalid-hands()
=> tidy:split-headings()
=> tidy:summarize()
=> tidy:summarize-headings()
=> tidy:summarize-notes()
=> tidy:summarize-hi()
=> tidy:sort-double-imgs()
=> tidy:tidy()
let $header :=
tidy:get-Fontanes-sources($tei//tei:teiHeader[parent::tei:TEI])
=> tidy:get-references-in-abstract()
(: tei:TEI/@id is always something like 'Notizbuch A1'.
for sorting we use the shelf number :)
let $id-parts := tokenize($tei//tei:TEI/@id, " ")
let $key1 := substring($id-parts[2], 1, 1)
let $key2 := substring($id-parts[2], 2)
let $final-tei := <TEI xmlns="http://www.tei-c.org/ns/1.0" id="{$tei//tei:TEI/@id}" key1="{$key1}" key2="{$key2}">{$header}{$tidy//tei:text}</TEI>
let $store := xmldb:store($config:data-root || "/print/xml/", $uri || ".xml", $final-tei)
return
$final-tei
};
(:~
: Returns the text that has been written by contemporary (or certain posthumous)
: hands. Up until this point, all encoded hands and their texts are still in
: place.
:)
declare function tidy:sort-out-invalid-hands($nodes as node()*)
as node()* {
for $node in $nodes return
let $prev-handshift := $node/preceding::tei:milestone[@unit = "handshift"][1]
let $is-hand-not-valid := not(simpleHelpers:is-hand-valid($tidy:valid-hands, $prev-handshift))
return
typeswitch ($node)
case text() return
if($prev-handshift
and $is-hand-not-valid) then
()
else
$node
(: all lines have to be preserved because of the editorial commentary
which references the lines in the notebooks. if we omitted @unit = "line"
referencing wouldn't work any longer :)
case element(tei:milestone) return
if($node/@unit = "handshift" and
simpleHelpers:is-hand-valid($tidy:valid-hands, $node)) then
tidy:construct-element($node, "post")
else if($node/@unit = "handshift") then
()
else if($prev-handshift
and $node/@unit = "line"
and $is-hand-not-valid) then
tidy:construct-element($node, "post")
else if($prev-handshift
and $is-hand-not-valid) then
()
else
tidy:construct-element($node, "post")
case element(tei:div) return
(: even though it's posthumous we want to keep the text written on
calendar pages by Friedrich Fontane. Unfortunately, Friedrich's
handshift is oftentimes not the first hand appearing on the page
but we want to keep the page nevertheless. :)
if($node/@type = "Kalenderblatt"
or $node/@type = "clipping") then
tidy:construct-element($node, "post")
else
tidy:invalid-hands-default-return($node)
default return
tidy:invalid-hands-default-return($node)
};
declare function tidy:invalid-hands-default-return($node as node()*)
as node()* {
let $prev-handshift := $node/preceding::tei:milestone[@unit = "handshift"][1]
let $first-child-handshift := $node/child::tei:milestone[@unit = "handshift"][1]
let $first-child-element := $node/child::*[1]
let $first-child-node := $node/child::node()[1]
return
(: in some cases the valid handshift is the first child node
instead of a previous node. of course we want to keep the element
then :)
if($first-child-element = $first-child-handshift
(: ensure there's no text before the handshift :)
and (normalize-space($first-child-node) = ""
or $first-child-element = $first-child-node)
and simpleHelpers:is-hand-valid($tidy:valid-hands, $first-child-handshift)) then
tidy:construct-element($node, "post")
else if($prev-handshift
and not(simpleHelpers:is-hand-valid($tidy:valid-hands, $prev-handshift))) then
()
else
tidy:construct-element($node, "post")
};
(:~
: Some elements aren't considered in the edited text. These encompass:
:
: * subsequent handshifts of the same type
: * certain line markers
: * empty elements that have lost their text nodes during the sorting process
:
:)
declare function tidy:sort-out-surplus-elements($nodes as node()*)
as node()* {
for $node in $nodes return
typeswitch ($node)
case text() return
$node
(: in the following typeswitch, the same constructor appears several
times. we decided not to :)
case element(tei:milestone) return
if($node/@unit = "handshift") then
if(simpleHelpers:is-prev-hand-same($node)) then
()
else
tidy:construct-element($node, "surplus")
else if($node/@unit = "line"
and ($node/ancestor::tei:seg[@type = "missing-hyphen"]
or $node/preceding-sibling::*[1][self::tei:seg[@type = "missing-hyphen"]])) then
()
else
tidy:construct-element($node, "surplus")
case element(tei:head) return
tidy:surplus-elements-default-return($node)
case element(tei:date) return
tidy:surplus-elements-default-return($node)
case element(tei:rs) return
tidy:surplus-elements-default-return($node)
case element(tei:note) return
tidy:surplus-elements-default-return($node)
case element(tei:abbr) return
if(not($node/* or $node/node())) then
()
else
tidy:construct-element($node, "surplus")
case element(tei:list) return
tidy:surplus-elements-default-return($node)
case element(tei:item) return
tidy:surplus-elements-default-return($node)
case element(tei:div) return
if($node/@type = "label"
and not($node/* or $node/node())) then
()
else
tidy:construct-element($node, "surplus")
default return
tidy:construct-element($node, "surplus")
};
(:~
: Since we have a bunch of elements that potentially lost their content in
: the sorting process, the default return checks for any content.
:)
declare function tidy:surplus-elements-default-return($node as node())
as element() {
if(not($node/* or $node/node())) then
()
else
tidy:construct-element($node, "surplus")
};
declare function tidy:has-hand-text($node as element(tei:milestone))
as xs:boolean {
let $next-handshift := $node/following::tei:milestone[@unit = "handshift"][1]
let $nodes-between := $node/following::node()[. << $next-handshift]
let $is-text-node :=
for $node-between in $nodes-between
return
if ($node-between[self::text()]
and not(normalize-space($node-between) = "")) then
true()
else
false()
return
if($next-handshift
and functx:is-value-in-sequence(true(), $is-text-node)) then
true()
else if(not($next-handshift)) then
true()
else
false()
};
(:~
: A constructor. Creates a TEI element with the same name and jumps back into
: the process of sorting out surplus elements.
:
: @param $node The current node
: @param $flag Indicates the function to be called from within the constructor
:)
declare function tidy:construct-element($node as node(), $flag as xs:string)
{
element {QName("http://www.tei-c.org/ns/1.0", $node/name())}{
$node/@*,
if($flag = "post") then
tidy:sort-out-invalid-hands($node/node())
else if($flag = "surplus") then
tidy:sort-out-surplus-elements($node/node())
else if($flag = "hs-enhance") then
tidy:enhance-handshifts($node/node())
else if($flag = "sources") then
tidy:get-Fontanes-sources($node/node())
else if($flag = "summarize") then
tidy:summarize($node/node())
else if($flag = "summarize-headings") then
tidy:summarize-headings($node/node())
else if($flag = "summarize-notes") then
tidy:summarize-notes($node/node())
else if($flag = "summarize-hi") then
tidy:summarize-hi($node/node())
else if($flag = "ref") then
tidy:get-references-in-abstract($node/node())
else if($flag = "double-imgs") then
tidy:sort-double-imgs($node/node())
else if($flag = "tidy") then
tidy:tidy($node/node())
else
text{"!!!Kopieren des Elements fehlgeschlagen!!!"}
}
};
(:~
: Purges surplus attributes from tei:milestone[@unit = "handshift"].
:
: @author Michelle Weidling
: @param $node the current tei:milestone[@unit = "handshift"]
: @return the purged tei:milestone[@unit = "handshift"]
: :)
declare function tidy:clear-handshift($node as element(tei:milestone))
as element(tei:milestone) {
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
attribute unit {"handshift"},
$node/(@* except (@subtype, @rend)),
if($node/@subtype = "") then
()
else
$node/@subtype,
if($node/@rend = "") then
()
else
$node/@rend
}
};
xquery version "3.1";
(:~
: This modules handles the conversion of the Fontante-TEI/XML into TEI simplePrint
: for the edited text. The resulting TEI simplePrint is the basis for the "Editerter
: Text" (edited text) view on the website and the book. It represents the latest
: layer of text.
:
: @author Michelle Weidling
: @version 0.1
: @since TODO
:)
module namespace tidySimple ="http://fontane-nb.dariah.eu/tidysimple";
declare namespace tei="http://www.tei-c.org/ns/1.0";
declare namespace test="http://exist-db.org/xquery/xqsuite";
import module namespace config="http://textgrid.de/ns/SADE/config" at "../../config/config.xqm";
import module namespace functx="http://www.functx.com";
import module namespace index-info="http://fontane-nb.dariah.eu/index-info" at "index-info.xqm";
import module namespace simpleHelpers="http://fontane-nb.dariah.eu/teisimplehelpers" at "teisimplehelpers.xqm";
declare variable $tidySimple:valid-hands :=
for $res in collection($config:data-root || "/data")
return
$res//tei:handNote[@script = "contemporary"]/@xml:id/string();
declare function tidySimple:main($tei as node()*, $uri as xs:string) {
(: let $tidy := tidySimple:enhance-handshifts($tei//tei:text):)
let $tidy := tidySimple:enhance-handshifts($tei)
=> tidySimple:sort-out-surplus-elements()
=> tidySimple:sort-out-invalid-hands()
=> tidySimple:split-headings()
=> tidySimple:summarize()
=> tidySimple:summarize-headings()
=> tidySimple:summarize-notes()
=> tidySimple:summarize-hi()
=> tidySimple:sort-double-imgs()
=> tidySimple:tidy()
let $header :=
tidySimple:get-Fontanes-sources($tei//tei:teiHeader[parent::tei:TEI])
=> tidySimple:get-references-in-abstract()
let $id-parts := tokenize($tei//tei:TEI/@id, " ")
let $key1 := substring($id-parts[2], 1, 1)
let $key2 := substring($id-parts[2], 2)
let $final-tei := <TEI xmlns="http://www.tei-c.org/ns/1.0" id="{$tei//tei:TEI/@id}" key1="{$key1}" key2="{$key2}">{$header}{$tidy//tei:text}</TEI>
let $store := xmldb:store($config:data-root || "/print/xml/", $uri || ".xml", $final-tei)
return
$final-tei
};
declare function tidySimple:sort-out-invalid-hands($nodes as node()*)
as node()* {
for $node in $nodes return
typeswitch ($node)
case text() return
if($node/preceding::tei:milestone[@unit = "handshift"][1]
and not(simpleHelpers:is-hand-valid($tidySimple:valid-hands, $node/preceding::tei:milestone[@unit = "handshift"][1]))) then
()
else
$node
(: all lines have to be preserved because of the editorial commentary
which references the lines in the notebooks. if we omitted @unit = "line"
referencing wouldn't work any longer :)
case element(tei:milestone) return
let $prev-handshift := $node/preceding::tei:milestone[@unit = "handshift"][1]
return
if($node/@unit = "handshift" and
simpleHelpers:is-hand-valid($tidySimple:valid-hands, $node)) then
tidySimple:copy-element($node, "post")
else if($node/@unit = "handshift") then
()
else if($prev-handshift
and $node/@unit = "line"
and not(simpleHelpers:is-hand-valid($tidySimple:valid-hands, $prev-handshift))) then
tidySimple:copy-element($node, "post")
else if($prev-handshift
and not(simpleHelpers:is-hand-valid($tidySimple:valid-hands, $prev-handshift))) then
()
else
tidySimple:copy-element($node, "post")
case element(tei:div) return
(: even though it's posthumous we want to keep the text written on
calendar pages by Friedrich Fontane. Unfortunately, Friedrich's
handshift is oftentimes not the first hand appearing on the page
but we want to keep the page nevertheless. :)
if($node/@type = "Kalenderblatt"
or $node/@type = "clipping") then
tidySimple:copy-element($node, "post")
else
tidySimple:invalid-hands-default-return($node)
default return
tidySimple:invalid-hands-default-return($node)
};
declare function tidySimple:invalid-hands-default-return($node as node()*)
as node()* {
let $prev-handshift := $node/preceding::tei:milestone[@unit = "handshift"][1]
let $first-child-handshift := $node/child::tei:milestone[@unit = "handshift"][1]
let $first-child-element := $node/child::*[1]
let $first-child-node := $node/child::node()[1]
return
(: in some cases the valid handshift is the first child node
instead of a previous node. of course we want to keep the element
then :)
if($first-child-element = $first-child-handshift
(: ensure there's no text before the handshift :)
and (normalize-space($first-child-node) = ""
or $first-child-element = $first-child-node)
and simpleHelpers:is-hand-valid($tidySimple:valid-hands, $first-child-handshift)) then
tidySimple:copy-element($node, "post")
else if($prev-handshift
and not(simpleHelpers:is-hand-valid($tidySimple:valid-hands, $prev-handshift))) then
()
else
tidySimple:copy-element($node, "post")
};
declare function tidySimple:sort-out-surplus-elements($nodes as node()*)
as node()* {
for $node in $nodes return
typeswitch ($node)
case text() return
$node
case element(tei:milestone) return
if($node/@unit = "handshift") then
if(
simpleHelpers:is-prev-hand-same($node)) then
()
else
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:sort-out-surplus-elements($node/node())
}
else if($node/@unit = "line"
and ($node/ancestor::tei:seg[@type = "missing-hyphen"]
or $node/preceding-sibling::*[1][self::tei:seg[@type = "missing-hyphen"]])) then
()
else
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:sort-out-surplus-elements($node/node())
}
case element(tei:head) return
if(not($node/* or $node/node())) then
()
else
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:sort-out-surplus-elements($node/node())
}
case element(tei:date) return
if(not($node/* or $node/node())) then
()
else
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:sort-out-surplus-elements($node/node())
}
case element(tei:rs) return
if(not($node/* or $node/node())) then
()
else
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:sort-out-surplus-elements($node/node())
}
case element(tei:note) return
if(not($node/* or $node/node())) then
()
else
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:sort-out-surplus-elements($node/node())
}
case element(tei:abbr) return
if(not($node/* or $node/node())) then
()
else
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:sort-out-surplus-elements($node/node())
}
case element(tei:list) return
if(not($node/* or $node/node())) then
()
else
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:sort-out-surplus-elements($node/node())
}
case element(tei:item) return
if(not($node/* or $node/node())) then
()
else
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:sort-out-surplus-elements($node/node())
}
case element(tei:div) return
if($node/@type = "label"
and not($node/* or $node/node())) then
()
else
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:sort-out-surplus-elements($node/node())
}
default return
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:sort-out-surplus-elements($node/node())
}
};
declare function tidySimple:has-hand-text($node as element(tei:milestone))
as xs:boolean {
let $next-handshift := $node/following::tei:milestone[@unit = "handshift"][1]
let $nodes-between := $node/following::node()[. << $next-handshift]
let $is-text-node :=
for $node-between in $nodes-between
return
if ($node-between[self::text()]
and not(normalize-space($node-between) = "")) then
true()
else
false()
return
if($next-handshift
and functx:is-value-in-sequence(true(), $is-text-node)) then
true()
else if(not($next-handshift)) then
true()
else
false()
};
declare function tidySimple:copy-element($node as node(), $flag as xs:string)
{
element {QName("http://www.tei-c.org/ns/1.0", $node/name())}{
$node/@*,
if($flag = "post") then
tidySimple:sort-out-invalid-hands($node/node())
else if($flag = "surplus") then
tidySimple:sort-out-invalid-hands($node/node())
else if($flag = "hs-enhance") then
tidySimple:enhance-handshifts($node/node())
else if($flag = "sources") then
tidySimple:get-Fontanes-sources($node/node())
else if($flag = "summarize") then
tidySimple:summarize($node/node())
else if($flag = "summarize-headings") then
tidySimple:summarize-headings($node/node())
else if($flag = "summarize-notes") then
tidySimple:summarize-notes($node/node())
else if($flag = "summarize-hi") then
tidySimple:summarize-hi($node/node())
else if($flag = "ref") then
tidySimple:get-references-in-abstract($node/node())
else if($flag = "double-imgs") then
tidySimple:sort-double-imgs($node/node())
else if($flag = "tidy") then
tidySimple:tidy($node/node())
else
text{"!!!Kopieren des Elements fehlgeschlagen!!!"}
}