Due to maintenance reasons, gitlab.gwdg.de will not be available on Saturday 2021-07-24 from 10:30 to approximately 11:30 CEST.

Commit 9a5b057a authored by mrodzis's avatar mrodzis 🌎
Browse files

chore(tidysimple.xqm, ettransfo.xqm): add documentation, rename module, minor refactoring

parent b2e44299
......@@ -26,15 +26,15 @@ import module namespace fsort="http://fontane-nb.dariah.eu/sort" at "sort.xqm";
import module namespace functx = "http://www.functx.com";
import module namespace prepCom="http://fontane-nb.dariah.eu/prepCom" at "prepcom.xqm";
import module namespace simple2xhtml="http://fontane-nb.dariah.eu/simple2xhtml" at "simple2xhtml.xqm";
import module namespace tidySimple ="http://fontane-nb.dariah.eu/tidysimple" at "tidysimple.xqm";
import module namespace tidy ="http://fontane-nb.dariah.eu/tidy" at "tidy.xqm";
declare variable $etTransfo:cases :=
(
"3qtcz.xml", (: case C :)
"3qtqv.xml", (: case A :)
"3qtqw.xml", (: case B :)
"3qtqx.xml", (: case D :)
"3qtqz.xml" (: case E :)
(: "3qtcz.xml", (: case C :):)
(: "3qtqv.xml", (: case A :):)
(: "3qtqw.xml", (: case B :):)
"3qtqx.xml" (: case D :)
(: "3qtqz.xml" (: case E :):)
);
declare variable $etTransfo:coll := "/db/apps/SADE/modules/fontane/edited-text/";
......@@ -231,7 +231,7 @@ declare function etTransfo:transform-tei($tei as element(tei:TEI), $log as xs:st
let $tidy-interform :=
try {
tidySimple:main($transform-to-interform, $tei/@id)
tidy:main($transform-to-interform, $tei/@id)
} catch * {
etTransfo:add-log-entry($log, "ETTRANSFO12: Error while tidying up the intermediate format for this notebook. Reason: 
" ||
concat("[", $err:line-number, ": ", $err:column-number, "] Error ", $err:code, ": ", $err:description))
......
xquery version "3.1";
(:~
: This modules handles the conversion of the Fontante-TEI/XML into a TEI subset
: for the edited text. The resulting TEI is the basis for the "Editerter
: Text" (edited text) view on the website and the book. It represents the latest
: layer of text.
:
: Its main purpose is to tidy up the intermediate TEI that has been created by
: tei2teisimple.
:
: @author Michelle Weidling
: @version 0.1
: @since 0.0.0
:)
module namespace tidy ="http://fontane-nb.dariah.eu/tidy";
declare namespace tei="http://www.tei-c.org/ns/1.0";
declare namespace test="http://exist-db.org/xquery/xqsuite";
import module namespace config="http://textgrid.de/ns/SADE/config" at "../../config/config.xqm";
import module namespace functx="http://www.functx.com";
import module namespace index-info="http://fontane-nb.dariah.eu/index-info" at "index-info.xqm";
import module namespace simpleHelpers="http://fontane-nb.dariah.eu/teisimplehelpers" at "teisimplehelpers.xqm";
(: only contemporary hands (and selected posthumous hands) are considered for
: the edited text :)
declare variable $tidy:valid-hands :=
for $res in collection($config:data-root || "/data")
return
$res//tei:handNote[@script = "contemporary"]/@xml:id/string();
declare function tidy:main($tei as node()*, $uri as xs:string) {
let $tidy := tidy:enhance-handshifts($tei)
=> tidy:sort-out-surplus-elements()
=> tidy:sort-out-invalid-hands()
=> tidy:split-headings()
=> tidy:summarize()
=> tidy:summarize-headings()
=> tidy:summarize-notes()
=> tidy:summarize-hi()
=> tidy:sort-double-imgs()
=> tidy:tidy()
let $header :=
tidy:get-Fontanes-sources($tei//tei:teiHeader[parent::tei:TEI])
=> tidy:get-references-in-abstract()
(: tei:TEI/@id is always something like 'Notizbuch A1'.
for sorting we use the shelf number :)
let $id-parts := tokenize($tei//tei:TEI/@id, " ")
let $key1 := substring($id-parts[2], 1, 1)
let $key2 := substring($id-parts[2], 2)
let $final-tei := <TEI xmlns="http://www.tei-c.org/ns/1.0" id="{$tei//tei:TEI/@id}" key1="{$key1}" key2="{$key2}">{$header}{$tidy//tei:text}</TEI>
let $store := xmldb:store($config:data-root || "/print/xml/", $uri || ".xml", $final-tei)
return
$final-tei
};
(:~
: Returns the text that has been written by contemporary (or certain posthumous)
: hands. Up until this point, all encoded hands and their texts are still in
: place.
:)
declare function tidy:sort-out-invalid-hands($nodes as node()*)
as node()* {
for $node in $nodes return
let $prev-handshift := $node/preceding::tei:milestone[@unit = "handshift"][1]
let $is-hand-not-valid := not(simpleHelpers:is-hand-valid($tidy:valid-hands, $prev-handshift))
return
typeswitch ($node)
case text() return
if($prev-handshift
and $is-hand-not-valid) then
()
else
$node
(: all lines have to be preserved because of the editorial commentary
which references the lines in the notebooks. if we omitted @unit = "line"
referencing wouldn't work any longer :)
case element(tei:milestone) return
if($node/@unit = "handshift" and
simpleHelpers:is-hand-valid($tidy:valid-hands, $node)) then
tidy:construct-element($node, "post")
else if($node/@unit = "handshift") then
()
else if($prev-handshift
and $node/@unit = "line"
and $is-hand-not-valid) then
tidy:construct-element($node, "post")
else if($prev-handshift
and $is-hand-not-valid) then
()
else
tidy:construct-element($node, "post")
case element(tei:div) return
(: even though it's posthumous we want to keep the text written on
calendar pages by Friedrich Fontane. Unfortunately, Friedrich's
handshift is oftentimes not the first hand appearing on the page
but we want to keep the page nevertheless. :)
if($node/@type = "Kalenderblatt"
or $node/@type = "clipping") then
tidy:construct-element($node, "post")
else
tidy:invalid-hands-default-return($node)
default return
tidy:invalid-hands-default-return($node)
};
declare function tidy:invalid-hands-default-return($node as node()*)
as node()* {
let $prev-handshift := $node/preceding::tei:milestone[@unit = "handshift"][1]
let $first-child-handshift := $node/child::tei:milestone[@unit = "handshift"][1]
let $first-child-element := $node/child::*[1]
let $first-child-node := $node/child::node()[1]
return
(: in some cases the valid handshift is the first child node
instead of a previous node. of course we want to keep the element
then :)
if($first-child-element = $first-child-handshift
(: ensure there's no text before the handshift :)
and (normalize-space($first-child-node) = ""
or $first-child-element = $first-child-node)
and simpleHelpers:is-hand-valid($tidy:valid-hands, $first-child-handshift)) then
tidy:construct-element($node, "post")
else if($prev-handshift
and not(simpleHelpers:is-hand-valid($tidy:valid-hands, $prev-handshift))) then
()
else
tidy:construct-element($node, "post")
};
(:~
: Some elements aren't considered in the edited text. These encompass:
:
: * subsequent handshifts of the same type
: * certain line markers
: * empty elements that have lost their text nodes during the sorting process
:
:)
declare function tidy:sort-out-surplus-elements($nodes as node()*)
as node()* {
for $node in $nodes return
typeswitch ($node)
case text() return
$node
(: in the following typeswitch, the same constructor appears several
times. we decided not to :)
case element(tei:milestone) return
if($node/@unit = "handshift") then
if(simpleHelpers:is-prev-hand-same($node)) then
()
else
tidy:construct-element($node, "surplus")
else if($node/@unit = "line"
and ($node/ancestor::tei:seg[@type = "missing-hyphen"]
or $node/preceding-sibling::*[1][self::tei:seg[@type = "missing-hyphen"]])) then
()
else
tidy:construct-element($node, "surplus")
case element(tei:head) return
tidy:surplus-elements-default-return($node)
case element(tei:date) return
tidy:surplus-elements-default-return($node)
case element(tei:rs) return
tidy:surplus-elements-default-return($node)
case element(tei:note) return
tidy:surplus-elements-default-return($node)
case element(tei:abbr) return
if(not($node/* or $node/node())) then
()
else
tidy:construct-element($node, "surplus")
case element(tei:list) return
tidy:surplus-elements-default-return($node)
case element(tei:item) return
tidy:surplus-elements-default-return($node)
case element(tei:div) return
if($node/@type = "label"
and not($node/* or $node/node())) then
()
else
tidy:construct-element($node, "surplus")
default return
tidy:construct-element($node, "surplus")
};
(:~
: Since we have a bunch of elements that potentially lost their content in
: the sorting process, the default return checks for any content.
:)
declare function tidy:surplus-elements-default-return($node as node())
as element() {
if(not($node/* or $node/node())) then
()
else
tidy:construct-element($node, "surplus")
};
declare function tidy:has-hand-text($node as element(tei:milestone))
as xs:boolean {
let $next-handshift := $node/following::tei:milestone[@unit = "handshift"][1]
let $nodes-between := $node/following::node()[. << $next-handshift]
let $is-text-node :=
for $node-between in $nodes-between
return
if ($node-between[self::text()]
and not(normalize-space($node-between) = "")) then
true()
else
false()
return
if($next-handshift
and functx:is-value-in-sequence(true(), $is-text-node)) then
true()
else if(not($next-handshift)) then
true()
else
false()
};
(:~
: A constructor. Creates a TEI element with the same name and jumps back into
: the process of sorting out surplus elements.
:
: @param $node The current node
: @param $flag Indicates the function to be called from within the constructor
:)
declare function tidy:construct-element($node as node(), $flag as xs:string)
{
element {QName("http://www.tei-c.org/ns/1.0", $node/name())}{
$node/@*,
if($flag = "post") then
tidy:sort-out-invalid-hands($node/node())
else if($flag = "surplus") then
tidy:sort-out-surplus-elements($node/node())
else if($flag = "hs-enhance") then
tidy:enhance-handshifts($node/node())
else if($flag = "sources") then
tidy:get-Fontanes-sources($node/node())
else if($flag = "summarize") then
tidy:summarize($node/node())
else if($flag = "summarize-headings") then
tidy:summarize-headings($node/node())
else if($flag = "summarize-notes") then
tidy:summarize-notes($node/node())
else if($flag = "summarize-hi") then
tidy:summarize-hi($node/node())
else if($flag = "ref") then
tidy:get-references-in-abstract($node/node())
else if($flag = "double-imgs") then
tidy:sort-double-imgs($node/node())
else if($flag = "tidy") then
tidy:tidy($node/node())
else
text{"!!!Kopieren des Elements fehlgeschlagen!!!"}
}
};
(:~
: Purges surplus attributes from tei:milestone[@unit = "handshift"].
:
: @author Michelle Weidling
: @param $node the current tei:milestone[@unit = "handshift"]
: @return the purged tei:milestone[@unit = "handshift"]
: :)
declare function tidy:clear-handshift($node as element(tei:milestone))
as element(tei:milestone) {
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
attribute unit {"handshift"},
$node/(@* except (@subtype, @rend)),
if($node/@subtype = "") then
()
else
$node/@subtype,
if($node/@rend = "") then
()
else
$node/@rend
}
};
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment