Commit 964d6d9a authored by Michelle Rodzis's avatar Michelle Rodzis Committed by mrodzis
Browse files

Add new first draft for handShifts and structure

parent b1d4f873
......@@ -12,7 +12,7 @@ import module namespace fontane-simple="http://fontane-nb.dariah.eu/teisimple"
at "tei2teisimple.xqm";
import module namespace tidySimple ="http://fontane-nb.dariah.eu/tidysimple" at "tidysimple.xqm";
(:let $tei := fontane-simple:main("16b00.xml"):)
(:return tidySimple:main($tei):)
let $tei := fontane-simple:main("16b00.xml")
return tidySimple:main($tei)
fontane-simple:main("16b00.xml")
\ No newline at end of file
(:fontane-simple:main("16b00.xml"):)
\ No newline at end of file
......@@ -12,40 +12,43 @@ declare namespace test="http://exist-db.org/xquery/xqsuite";
(: handshifts :)
declare
%test:name("handShift - general")
%test:name("handShift - Sort out invalid ones")
%test:args("<handShift xmlns=""http://www.tei-c.org/ns/1.0"" new=""#Stempel2""/>")
%test:assertEmpty
%test:args("<handShift xmlns=""http://www.tei-c.org/ns/1.0"" new=""#Fontane"" script=""Latf""/>")
%test:assertEquals("<handShift xmlns=""http://www.tei-c.org/ns/1.0"" new=""#Fontane"" script=""Latf""/>")
function tidysimple-test:handShift($node as element(*)) {
tidySimple:tidy($node)
function tidysimple-test:handShift-invalid($node as element(*)) {
tidySimple:sort-out-invalid-hands($node)
};
declare
%test:name("handShift - remove duplicates")
%test:args("<body xmlns=""http://www.tei-c.org/ns/1.0""><handShift xmlns=""http://www.tei-c.org/ns/1.0"" new=""#Fontane"" script=""Latf"" medium=""""/><handShift xmlns=""http://www.tei-c.org/ns/1.0"" new=""#Fontane"" script=""Latf"" medium=""""/></body>")
%test:assertEquals("<body xmlns=""http://www.tei-c.org/ns/1.0""><handShift xmlns=""http://www.tei-c.org/ns/1.0"" new=""#Fontane"" script=""Latf""/></body>")
%test:name("handShift - Sort out surplus ones")
%test:args("<div xmlns=""http://www.tei-c.org/ns/1.0""><handShift xmlns=""http://www.tei-c.org/ns/1.0"" new=""#Fontane"" script=""Latf""/>Some text<handShift xmlns=""http://www.tei-c.org/ns/1.0"" new=""#Fontane"" script=""Latn""/></div>")
%test:assertEquals("<div xmlns=""http://www.tei-c.org/ns/1.0""><handShift xmlns=""http://www.tei-c.org/ns/1.0"" new=""#Fontane"" script=""Latf""/>Some text<handShift xmlns=""http://www.tei-c.org/ns/1.0"" new=""#Fontane"" script=""Latn""/></div>")
%test:args("<body xmlns=""http://www.tei-c.org/ns/1.0""><handShift xmlns=""http://www.tei-c.org/ns/1.0"" new=""#Fontane"" script=""Latf"" medium=""""/><handShift xmlns=""http://www.tei-c.org/ns/1.0"" new=""#Fontane"" script=""Latn"" medium=""""/></body>")
%test:assertEquals("<body xmlns=""http://www.tei-c.org/ns/1.0""><handShift xmlns=""http://www.tei-c.org/ns/1.0"" new=""#Fontane"" script=""Latf""/><handShift xmlns=""http://www.tei-c.org/ns/1.0"" new=""#Fontane"" script=""Latn""/></body>")
%test:args("<div xmlns=""http://www.tei-c.org/ns/1.0""><handShift xmlns=""http://www.tei-c.org/ns/1.0"" new=""#Fontane"" medium="""" script=""Latn""/>Some text<handShift xmlns=""http://www.tei-c.org/ns/1.0"" new=""#Fontane"" medium="""" script=""Latn""/></div>")
%test:assertEquals("<div xmlns=""http://www.tei-c.org/ns/1.0""><handShift xmlns=""http://www.tei-c.org/ns/1.0"" medium="""" new=""#Fontane"" script=""Latn""/>Some text</div>")
function tidysimple-test:handShift-remove-duplicates($node as element(*)) {
tidySimple:tidy($node)
function tidysimple-test:handShift-surplus($node as element(*)) {
tidySimple:sort-out-surplus-hands($node)
};
(: Sections :)
(: only apply valid elements, i.e. elements that belong to a valid hand :)
declare
%test:name("only apply valid elements")
%test:args("<body xmlns=""http://www.tei-c.org/ns/1.0""><handShift xmlns=""http://www.tei-c.org/ns/1.0"" new=""#Friedrich_Fontane"" medium=""black_ink"" script=""Latn clean""/><tei:seg xmlns:tei=""http://www.tei-c.org/ns/1.0"" rendition=""font-size:small"">Dieses Buch hat 52 Blatt.</tei:seg></body>")
%test:assertEquals("<body xmlns=""http://www.tei-c.org/ns/1.0""/>")
%test:name("Sections")
%test:args("<body xmlns=""http://www.tei-c.org/ns/1.0""><milestone xmlns=""http://www.tei-c.org/ns/1.0"" unit=""section"" type=""Text_2"" spanTo=""#C07_4r_s""/><handShift xmlns=""http://www.tei-c.org/ns/1.0"" new=""#Fontane"" script=""Latn clean"" medium=""""/><head xmlns=""http://www.tei-c.org/ns/1.0"" type=""x-large"" rend=""align(center)""><seg xmlns=""http://www.tei-c.org/ns/1.0"" rendition=""font-size:x-large; letter-spacing:0.2cm""><rs type=""direct"" ref=""psn:Cranach""><hi xmlns=""http://www.tei-c.org/ns/1.0"">Lucas Cranach</hi></rs>.</seg></head><handShift xmlns=""http://www.tei-c.org/ns/1.0"" script=""Latf standard"" new=""#Fontane"" medium=""""/><anchor xmlns=""http://www.tei-c.org/ns/1.0"" xml:id=""C07_4r_s""/></body>")
%test:assertEquals("<body xmlns=""http://www.tei-c.org/ns/1.0""><div xmlns=""http://www.tei-c.org/ns/1.0"" type=""section""><handShift xmlns=""http://www.tei-c.org/ns/1.0"" new=""#Fontane"" script=""Latn clean"" medium=""""/><head xmlns=""http://www.tei-c.org/ns/1.0"" type=""x-large"" rend=""align(center)""><seg xmlns=""http://www.tei-c.org/ns/1.0"" rendition=""font-size:x-large; letter-spacing:0.2cm""><rs type=""direct"" ref=""psn:Cranach""><hi xmlns=""http://www.tei-c.org/ns/1.0"">Lucas Cranach</hi></rs>.</seg></head><handShift xmlns=""http://www.tei-c.org/ns/1.0"" script=""Latf standard"" new=""#Fontane"" medium=""""/></div></body>")
%test:args("<body xmlns=""http://www.tei-c.org/ns/1.0""><tei:handShift xmlns:tei=""http://www.tei-c.org/ns/1.0"" new=""#Fontane"" script=""Latn clean""/><tei:head xmlns:tei=""http://www.tei-c.org/ns/1.0"" type=""x-large""><rs xmlns:tei=""http://www.tei-c.org/ns/1.0"" type=""direct"" ref=""psn:Luther"">Luther</rs></tei:head></body>")
%test:assertEquals("<body xmlns=""http://www.tei-c.org/ns/1.0""><tei:handShift xmlns:tei=""http://www.tei-c.org/ns/1.0"" new=""#Fontane"" script=""Latn clean""/><tei:head xmlns:tei=""http://www.tei-c.org/ns/1.0"" type=""x-large""><rs xmlns:tei=""http://www.tei-c.org/ns/1.0"" type=""direct"" ref=""psn:Luther"">Luther</rs></tei:head></body>")
%test:args("<body xmlns=""http://www.tei-c.org/ns/1.0""><milestone xmlns=""http://www.tei-c.org/ns/1.0"" unit=""line""/><rs xmlns=""http://www.tei-c.org/ns/1.0"" type=""direct"" ref=""psn:Amalberga""><hi xmlns=""http://www.tei-c.org/ns/1.0"" xml:id=""C07_7r_d"" prev=""#C07_7r_c"">Amalberga</hi></rs></body>")
%test:assertEquals("<body xmlns=""http://www.tei-c.org/ns/1.0""><milestone xmlns=""http://www.tei-c.org/ns/1.0"" unit=""line""/><rs xmlns=""http://www.tei-c.org/ns/1.0"" type=""direct"" ref=""psn:Amalberga""><hi xmlns=""http://www.tei-c.org/ns/1.0"" xml:id=""C07_7r_d"" prev=""#C07_7r_c"">Amalberga</hi></rs></body>")
function tidysimple-test:valid-elements($node as element(*)) {
tidySimple:tidy($node)
};
\ No newline at end of file
%test:args("<body xmlns=""http://www.tei-c.org/ns/1.0""><milestone xmlns=""http://www.tei-c.org/ns/1.0"" unit=""paragraph""/><milestone xmlns=""http://www.tei-c.org/ns/1.0"" unit=""line"" rendition=""indent""/>Dies dauerte <date xmlns=""http://www.tei-c.org/ns/1.0"" from=""1125"" to=""1425"" type=""asynchronous"">300 Jahr</date>.<milestone xmlns=""http://www.tei-c.org/ns/1.0"" unit=""paragraph""/></body>")
%test:assertEquals("<body xmlns=""http://www.tei-c.org/ns/1.0""><p xmlns=""http://www.tei-c.org/ns/1.0""><milestone xmlns=""http://www.tei-c.org/ns/1.0"" unit=""line"" rendition=""indent""/>Dies dauerte <date xmlns=""http://www.tei-c.org/ns/1.0"" from=""1125"" to=""1425"" type=""asynchronous"">300 Jahr</date>.</p></body>")
function tidysimple-test:sections($node as element(*)) {
tidySimple:make-structure($node)
};
......@@ -23,8 +23,8 @@ import module namespace simpleHelpers="http://fontane-nb.dariah.eu/teisimplehelp
declare variable $tidySimple:valid-hands :=
(: for $res in collection("/db/sade-projects/textgrid/data/xml/data"):)
for $res in collection("/db/apps/SADE/resources/xml")
for $res in collection("/db/sade-projects/textgrid/data/xml/data")
(: for $res in collection("/db/apps/SADE/resources/xml"):)
return
$res//tei:handNote[@script = "contemporary"]/@xml:id/string();
......@@ -85,44 +85,56 @@ as node()* {
tidySimple:copy-element($node, "surplus")
default return
tidySimple:copy-element($node, "surplus")
element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
$node/@*,
tidySimple:sort-out-surplus-hands($node/node())
}
};
declare function tidySimple:tidy($nodes as node()*) as node()* {
for $node in $nodes return
typeswitch ($node)
case text() return
$node
case element(tei:handShift) return
if(simpleHelpers:is-hand-valid($tidySimple:valid-hands, $node)
and not(simpleHelpers:is-prev-valid-hand-same($tidySimple:valid-hands, $node))) then
tidySimple:clear-handshift($node)
else
()
default return
if(simpleHelpers:belongs-to-valid-hand($tidySimple:valid-hands, $node)) then
tidySimple:copy-element($node, "tidy")
else
()
()
};
declare function tidySimple:make-structure($nodes as node()*) as node()* {
for $node in $nodes return
typeswitch ($node)
case text() return
$node
if ($node/preceding::tei:milestone/@spanTo
and $node/following::tei:anchor[matches(preceding::tei:milestone/@spanTo, @xml:id)]
or $node/preceding::tei:milestone[@unit = "paragraph"]
and $node/preceding::tei:milestone[@unit = "paragraph"]) then
()
else
$node
case element(tei:milestone) return
case element(tei:milestone) return
if($node/@unit = "section") then
tidySimple:make-section($node)
else
else if($node/@unit = "paragraph") then
tidySimple:make-paragraph($node)
default return
if($node[preceding::tei:milestone]) then
else if($node/@unit = "line"
and $node/preceding::tei:milestone/@spanTo
and $node/following::tei:anchor[matches(preceding::tei:milestone/@spanTo, @xml:id)]
or $node/preceding::tei:milestone[@unit = "paragraph"]
and $node/preceding::tei:milestone[@unit = "paragraph"]) then
()
else
tidySimple:copy-element($node, "structure")
case element(tei:anchor) return
if (matches($node/preceding::tei:milestone/@spanTo, $node/@xml:id)) then
()
else
tidySimple:copy-element($node, "structure")
default return
if($node/preceding::tei:milestone/@spanTo
and $node/following::tei:anchor[matches(preceding::tei:milestone/@spanTo, @xml:id)]
or $node/preceding::tei:milestone[@unit = "paragraph"]
and $node/preceding::tei:milestone[@unit = "paragraph"]) then
()
else
tidySimple:copy-element($node, "structure")
......@@ -130,7 +142,7 @@ declare function tidySimple:make-structure($nodes as node()*) as node()* {
declare function tidySimple:copy-element($node as node(), $flag as xs:string)
as node() {
{
element {QName("http://www.tei-c.org/ns/1.0", $node/name())}{
$node/@*,
if($flag = "structure") then
......@@ -187,7 +199,7 @@ as node()* {
let $target-id := substring-after($node/@spanTo, "#")
let $target := $node/following::tei:anchor[matches(@xml:id, $target-id)]
return
$node/following::*[. << $target]
$node/following-sibling::node()[. << $target]
};
......@@ -209,7 +221,12 @@ as element(tei:p) {
:)
declare function tidySimple:get-paragraph-chunk($node as element(tei:milestone))
as node()* {
let $target := $node/following::tei:milestone[@unit = "paragraph"]
let $target := $node/following::tei:milestone[@unit = "paragraph"][1]
return
$node/following::*[. << $target]
($node/following-sibling::node()[. << $target], console:log($node/following-sibling::*[. << $target]))
};
declare function tidySimple:is-second-part-of-pair($node as element(tei:milestone)) as xs:boolean {
()
};
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment