presort.xqm 9.75 KB
Newer Older
1
2
3
4
5
6
7
8
xquery version "3.1";

module namespace presort="http://fontane-nb.dariah.eu/presort";

declare namespace tei="http://www.tei-c.org/ns/1.0";
declare namespace test="http://exist-db.org/xquery/xqsuite";

import module namespace console="http://exist-db.org/xquery/console";
9
import module namespace functx="http://www.functx.com";
10
11
import module namespace simpleHelpers="http://fontane-nb.dariah.eu/teisimplehelpers" at "teisimplehelpers.xqm";

12
declare function presort:main($tei as node()*) as element(tei:text) {
13
14
15
16
17
18
19
20
21
22
23
24
25
26
(:    let $front := $tei//tei:front/node():)
(:    let $back := $tei//tei:back/node():)
(:    let $body := $tei//tei:body/node()    :)
(::)
(:    let $tei :=:)
(:        <text xmlns="http://www.tei-c.org/ns/1.0">:)
(:            <front>{presort:sort($front)}</front>:)
(:            <body>{presort:sort($body)}</body>:)
(:            <back>{presort:sort($back)}</back>:)
(:        </text>:)
(:        :)
(:    let $store := xmldb:store("/db/apps/SADE/resources/xml/", "tei-simple-presort.xml", $tei):)
(:    return $tei:)
    
27
28
    let $prepared := presort:prepare($tei)
    let $tei := presort:sort($prepared)
29
30
31
    let $fully-sorted := presort:sort-integrations($tei)
    let $store := xmldb:store("/db/apps/SADE/resources/xml/", "tei-simple-presort.xml", $fully-sorted)
    return $fully-sorted
32
33
};

34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
declare function presort:prepare($nodes as node()*) as node()* {
    for $node in $nodes
      return
        typeswitch ($node)
        case text() return 
            $node
            
        case comment() return
            ()
        
        default return
            let $addSpan := $node/preceding-sibling::*[self::tei:addSpan][1][@place = 'interlinear'][@prev or @next]
            return
            if($addSpan and not($node[self::tei:anchor])) then
                let $spanTo := substring-after($addSpan/@spanTo, "#")
                let $anchor := $addSpan/following::tei:anchor[@xml:id = $spanTo]
                let $nodes-inbetween := $addSpan/following-sibling::*[. << $anchor]
                
                return
                    element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
                        $node/@*,
                        if(functx:is-node-in-sequence-deep-equal($node, $nodes-inbetween)) then
                            attribute type {"interlinear"}
                        else
                            (),
                        $node/node()
                    }
            else
                element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
                    $node/@*,
                    presort:prepare($node/node())
                }
        
};

69
70
71
72
declare function presort:sort($nodes as node()*) as node()* {
    for $node in $nodes
      return
        typeswitch ($node)
MRodz's avatar
MRodz committed
73
        case text() return 
74
            $node
MRodz's avatar
MRodz committed
75
76
77
            
        case comment() return
            ()
78
79
            
        case element(tei:rs) return
mrodzis's avatar
mrodzis committed
80
            (: for nodes that only have text :)
mrodzis's avatar
mrodzis committed
81
82
83
84
85
86
87
            if($node[@next and not(child::*)]) then
                let $next-node := presort:find-corresp-node($node, "next")
                return
                    if($next-node[not(child::*)]) then
                        ($node, $next-node)
                    else
                        presort:exclude-copied($node)
mrodzis's avatar
mrodzis committed
88
            (: for nodes that only have text :)
mrodzis's avatar
mrodzis committed
89
90
91
92
93
94
95
            else if($node[@prev and not(child::*)]) then
                let $prev-node := presort:find-corresp-node($node, "prev")
                return
                    if($prev-node[not(child::*)]) then
                        ()
                    else
                        presort:exclude-copied($node)
mrodzis's avatar
mrodzis committed
96
97
98
99
            (: for nodes that only have a tei:hi that is part of a virtual 
            aggregation. these nodes will be processed in tei:hi :)
            else if($node[child::hi[@prev] and count(child::*)= 1]) then
                presort:sort($node/node())
mrodzis's avatar
mrodzis committed
100
101
            else
                presort:exclude-copied($node)
102
        
mrodzis's avatar
mrodzis committed
103
104
105
106
                
        case element(tei:hi) return
            if($node/@next and not($node/ancestor::tei:rs)) then
                let $corresp-node := presort:find-corresp-node($node, "next")
107
                return
mrodzis's avatar
mrodzis committed
108
109
110
111
112
113
                    if($corresp-node[self::tei:hi and parent::tei:rs]) then
                        (presort:keep-node($node),
                        element {QName("http://www.tei-c.org/ns/1.0", "rs")} {
                            $corresp-node/parent::*/@*,
                            $corresp-node
                        })
114
                    else
mrodzis's avatar
mrodzis committed
115
                        presort:default-return($node)
MRodz's avatar
MRodz committed
116
            else
mrodzis's avatar
mrodzis committed
117
118
119
                presort:default-return($node)
        
        default return
120
121
122
123
            if($node/@type = "interlinear") then
                ()
            else
                presort:default-return($node)
MRodz's avatar
MRodz committed
124
125
126
};

declare function presort:keep-node($node as node()*) as node()* {
127
    element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
MRodz's avatar
MRodz committed
128
        $node/@*,
129
(:        $node/(@* except (@prev, @next)), (: attrs only visible for debugging :):)
MRodz's avatar
MRodz committed
130
131
132
133
        presort:sort($node/node())
    }
};

134
135

declare function presort:apply-all-nexts($node as node()*) {
136
137
138
139
140
    if($node[self::tei:addSpan]) then
        let $spanTo := substring-after($node/@spanTo, "#")
        let $anchor := $node/following::tei:anchor[@xml:id = $spanTo]
        let $nodes-inbetween := $node/following-sibling::*[. << $anchor]
        
141
        return 
142
143
144
            for $node-inbetween in $nodes-inbetween return
                presort:keep-node($node-inbetween)
        
145
    else
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
        (: entry point :)
        if($node/@next and not($node/@prev)) then
            let $next-node := presort:find-corresp-node($node, "next")
            return 
                (presort:keep-node($node),
    (:            (functx:remove-attributes($node, "next"),:)
                presort:apply-all-nexts($next-node))
        (: exit point :)
        else if(not($node/@next)) then
    (:        functx:remove-attributes($node, "prev"):)
            presort:keep-node($node)
        else
            let $next-node := presort:find-corresp-node($node, "next")
            return
    (:            (functx:remove-attributes($node, ("next", "prev")),:)
                (presort:keep-node($node),
                presort:apply-all-nexts($next-node))
163
164
165
166
167
168
169
170
171
};


declare function presort:get-next-id($node as node()) {
    $node/@next => replace("#", "")
};

declare function presort:get-prev-id($node as node()) {
    $node/@prev => replace("#", "")
MRodz's avatar
MRodz committed
172
173
};

174
175
176
177
178
179
180
181
182
183
declare function presort:find-corresp-node($node as node()*, $flag as xs:string) {
    let $id :=
        if($flag = "next") then
            presort:get-next-id($node)
        else if($flag = "prev") then
            presort:get-prev-id($node)
        else
            error(QName("FONTANE", "PRESORT1"), "Invalid flag: " || $flag || "." )
    return
        $node/ancestor::*[last()]//*[@xml:id = $id]
MRodz's avatar
MRodz committed
184
185
};

mrodzis's avatar
mrodzis committed
186
declare function presort:exclude-copied($node as node()) as element(tei:rs)? {
187
188
189
    let $rs-children := $node/node()
    let $processed-children :=
        for $child in $rs-children return
190
191
            if($child/@next
            and not(matches($child/@style, "underline"))) then
192
193
194
195
196
197
198
                presort:apply-all-nexts($child)
            else if($child/@prev) then
                ()
            else
                $child
    
    return
mrodzis's avatar
mrodzis committed
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
        if(count($processed-children) gt 0) then
            element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
                $node/@*,
                $processed-children
            }
        else
            ()
};

declare function presort:default-return($node as node()) as node()* {
    if(($node/@next and not($node/@prev))
    and not(matches($node/@style, "underline"))) then
        presort:apply-all-nexts($node)
    else if(($node/@prev or $node/@next)
    and not(matches($node/@style, "underline"))) then
        let $prev-node := presort:find-corresp-node($node, "prev")
        let $next-node := presort:find-corresp-node($node, "next")
        return
            if($prev-node or $next-node) then
                ()
            else
                presort:keep-node($node)
    else
        presort:keep-node($node)
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
};


declare function presort:sort-integrations($nodes as node()*) as node()* {
    for $node in $nodes return
    if($node/@xml:id) then
        let $id := $node/@xml:id
        let $metamarks := $node/ancestor::*[last()]//tei:metamark[@function = "integrate"]
        let $linking-node-corresp := $metamarks[substring-after(@corresp, "#") = $id]
        let $linking-node-target := $metamarks[substring-after(@target, "#") = $id]

        return
            if($linking-node-corresp and not($linking-node-target)) then
                let $integration-target-id := replace($linking-node-corresp/@target, "#", "")
                let $integration-comment := $linking-node-corresp/ancestor::*[last()]//*[@xml:id = $integration-target-id]
                return
                    element {QName("http://www.tei-c.org/ns/1.0", "seg")} {
                        attribute type {"integration"},
                        $integration-comment,
                        $linking-node-corresp, 
                        presort:keep-node-integrations($node)
                    }
            else if($linking-node-target) then
                ()
            else
                presort:keep-node-integrations($node)
                
    else if($node[self::tei:metamark[@function = "integrate"]]) then
        ()
    else if($node[self::text() or self::comment()]) then
        $node
    else
        presort:keep-node-integrations($node)
};

declare function presort:keep-node-integrations($node as node()) as node()* {
    element {QName("http://www.tei-c.org/ns/1.0", $node/name())} {
        $node/@*,
(:        $node/(@* except (@prev, @next)), (: attrs only visible for debugging :):)
        presort:sort-integrations($node/node())
    }
264
};