tidysimple.xqm 7.64 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
xquery version "3.1";

(:~
 : This modules handles the conversion of the Fontante-TEI/XML into TEI simplePrint
 : for the edited text. The resulting TEI simplePrint is the basis for the "Editerter
 : Text" (edited text) view on the website and the book. It represents the latest
 : layer of text.
 :
 : @author Michelle Rodzis
 : @version 0.1
 : @since TODO
 :)

module namespace tidySimple ="http://fontane-nb.dariah.eu/tidysimple";

MRodz's avatar
MRodz committed
16

17
declare namespace tei="http://www.tei-c.org/ns/1.0";
MRodz's avatar
MRodz committed
18
declare namespace test="http://exist-db.org/xquery/xqsuite";
19

MRodz's avatar
MRodz committed
20
import module namespace config="http://textgrid.de/ns/SADE/config" at "../../config/config.xqm";
21
22
23
import module namespace console="http://exist-db.org/xquery/console";
import module namespace simpleHelpers="http://fontane-nb.dariah.eu/teisimplehelpers" at "teisimplehelpers.xqm";

MRodz's avatar
MRodz committed
24

25
declare variable $tidySimple:valid-hands :=
26
27
    for $res in collection("/db/sade-projects/textgrid/data/xml/data")
(:    for $res in collection("/db/apps/SADE/resources/xml"):)
MRodz's avatar
MRodz committed
28
29
30
31
    return
        $res//tei:handNote[@script = "contemporary"]/@xml:id/string();


MRodz's avatar
MRodz committed
32
declare function tidySimple:main($tei as node()*) as xs:string* {
33
34
35
36
37
38
39
  let $doc :=
    try {
      (doc($config:app-root || "/resources/xml/tei-simple-pre.xml"))
    } catch * {
      (console:log("It was not possible to open the requested file tei-simple-pre.xml."))
    }
    let $text := $doc//tei:text
40
41
42
    let $clear-invalid-hands := tidySimple:sort-out-invalid-hands($text)
    let $clear-surplus-hands := tidySimple:sort-out-surplus-hands($clear-invalid-hands)
    let $text-with-sections := tidySimple:make-structure($clear-surplus-hands)
43
44
45
46
47
48
49
50
51
52
53
    let $store := xmldb:store("/db/apps/SADE/resources/xml/", "tei-simple-tmp.xml", <div>{$text-with-sections}</div>)
    return ""
(:    let $doc := doc($config:app-root || "/resources/xml/tei-simple-tmp.xml"):)
(:    let $tidied-text := tidySimple:tidy($doc//tei:text):)
(:    let $final-tei:=:)
(:        <TEI xmlns="http://www.tei-c.org/ns/1.0">:)
(:            {$tei//tei:teiHeader}:)
(:            {$tidied-text}:)
(:        </TEI>:)
(::)
(:    return xmldb:store("/db/apps/SADE/resources/xml/", "tei-simple.xml", $final-tei):)
MRodz's avatar
MRodz committed
54
55
};

56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
declare function tidySimple:sort-out-invalid-hands($nodes as node()*) 
as node()* {
    for $node in $nodes return
        typeswitch ($node)
        case text() return
            $node
        
        case element(tei:handShift) return
            if(simpleHelpers:is-hand-valid($tidySimple:valid-hands, $node)) then
                tidySimple:copy-element($node, "post")
            else
                ()
                
        default return
            tidySimple:copy-element($node, "post")
};


declare function tidySimple:sort-out-surplus-hands($nodes as node()*) 
as node()* {
    for $node in $nodes return
        typeswitch ($node)
        case text() return
            $node
        
        case element(tei:handShift) return
            if(simpleHelpers:is-prev-hand-same($node)) then
                ()
            else
                tidySimple:copy-element($node, "surplus")
                
        default return
88
89
90
91
            element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
                $node/@*,
                tidySimple:sort-out-surplus-hands($node/node())
            }
92
93
};

MRodz's avatar
MRodz committed
94
95

declare function tidySimple:tidy($nodes as node()*) as node()* {
96
    ()
97
98
99
100
101
102
};

declare function tidySimple:make-structure($nodes as node()*) as node()* {
    for $node in $nodes return
        typeswitch ($node)
        case text() return
103
104
105
106
107
108
109
            if ($node/preceding::tei:milestone/@spanTo 
            and $node/following::tei:anchor[matches(preceding::tei:milestone/@spanTo, @xml:id)]
            or $node/preceding::tei:milestone[@unit = "paragraph"]
            and $node/preceding::tei:milestone[@unit = "paragraph"]) then
                ()
            else
                $node
110
                
111
        case element(tei:milestone) return 
112
113
            if($node/@unit = "section") then
                tidySimple:make-section($node)
114
115
                
            else if($node/@unit = "paragraph") then
116
117
                tidySimple:make-paragraph($node)
                
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
            else if($node/@unit = "line"
            and $node/preceding::tei:milestone/@spanTo 
            and $node/following::tei:anchor[matches(preceding::tei:milestone/@spanTo, @xml:id)]
            or $node/preceding::tei:milestone[@unit = "paragraph"]
            and $node/preceding::tei:milestone[@unit = "paragraph"]) then
                ()
            else
                tidySimple:copy-element($node, "structure")
                
        case element(tei:anchor) return
            if (matches($node/preceding::tei:milestone/@spanTo, $node/@xml:id)) then
                ()
            else
                tidySimple:copy-element($node, "structure")
                
        default return 
            if($node/preceding::tei:milestone/@spanTo 
            and $node/following::tei:anchor[matches(preceding::tei:milestone/@spanTo, @xml:id)]
            or $node/preceding::tei:milestone[@unit = "paragraph"]
            and $node/preceding::tei:milestone[@unit = "paragraph"]) then
138
                ()
139
140
            else
                tidySimple:copy-element($node, "structure")
141
142
143
};


144
declare function tidySimple:copy-element($node as node(), $flag as xs:string)
145
 {
146
147
    element {QName("http://www.tei-c.org/ns/1.0", $node/name())}{
        $node/@*,
148
149
        if($flag = "structure") then
            tidySimple:make-structure($node/node())
150
151
152
153
        else if($flag = "post") then
            tidySimple:sort-out-invalid-hands($node/node())        
        else if($flag = "surplus") then
            tidySimple:sort-out-invalid-hands($node/node())
154
155
        else
            tidySimple:tidy($node/node())
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
    }
};


declare function tidySimple:clear-handshift($node as element(tei:handShift))
as element(tei:handShift) {
    element tei:handShift {
        if($node/@new = "") then
            ()
        else
            $node/@new,
        if($node/@script = "") then
            ()
        else
            $node/@script,
        if($node/@medium = "") then
            ()
        else
            $node/@medium
    }
};
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201



declare function tidySimple:make-section($node as element(tei:milestone)) 
as element(tei:div) {
    element tei:div {
        attribute type {"section"},
        tidySimple:get-section-chunk($node)
    }
};


(:~
 : Returns all nodes between a tei:milestone[@unit = "section"] and its 
 : corresponding tei:anchor.
 :
 : @author Michelle Rodzis
 : @param the current tei:milestone element
 : @return all nodes between the current element and the respective tei:anchor
 :)
declare function tidySimple:get-section-chunk($node as element(tei:milestone)) 
as node()* {
    let $target-id := substring-after($node/@spanTo, "#")
    let $target := $node/following::tei:anchor[matches(@xml:id, $target-id)]
    return
202
        $node/following-sibling::node()[. << $target]
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
};


declare function tidySimple:make-paragraph($node as element(tei:milestone)) 
as element(tei:p) {
    element tei:p {
        tidySimple:get-paragraph-chunk($node)
    }
};


(:~
 : Returns all nodes between a tei:milestone[@unit = "paragraph"] and the next
 : one.
 :
 : @author Michelle Rodzis
 : @param the current tei:milestone element
 : @return all nodes between the current element and the next tei:milestone[@unit = "paragraph"]
 :)
declare function tidySimple:get-paragraph-chunk($node as element(tei:milestone)) 
as node()* {
224
    let $target := $node/following::tei:milestone[@unit = "paragraph"][1]
225
    return
226
227
228
229
230
231
        ($node/following-sibling::node()[. << $target], console:log($node/following-sibling::*[. << $target]))
};


declare function tidySimple:is-second-part-of-pair($node as element(tei:milestone)) as xs:boolean {
    ()
232
};