teisimplehelpers.xqm 15.3 KB
Newer Older
1
2
xquery version "3.1";

3
4
5
6
(:~
 : This module contains a potpourri of functions that are needed to serialize
 : the TEI simplePrint version of the Fontane TEI encoding.
 :
MRodz's avatar
MRodz committed
7
 : @author Michelle Weidling
8
9
10
 : @version 1.0
 :)

11
module namespace simpleHelpers="http://fontane-nb.dariah.eu/teisimplehelpers";
12
13
14
15

declare namespace tei="http://www.tei-c.org/ns/1.0";

import module namespace console="http://exist-db.org/xquery/console";
16
import module namespace config="http://textgrid.de/ns/SADE/config" at "../../config/config.xqm";
17
18
19
20
21
import module namespace functx = "http://www.functx.com";


(: TODO: adapt to general XML :)
(:~
22
 : Determines whether the passed tei:milestone is contemporary or not.
23
 :
24
 : TODO: check if we reach backside of calendar pages at all
MRodz's avatar
MRodz committed
25
 : @author Michelle Weidling
26
 : @param $hand the tei:milestone/@subtype or tei:mod/@hand to be checked
27
 : @return xs:boolean
28
29
 :)
declare function simpleHelpers:is-hand-contemporary($hand as xs:string?)
30
31
as xs:boolean {
    let $hand := replace($hand, "#", "")
32
    let $file := doc("/db/sade-projects/textgrid/data/xml/data/16b00.xml")
33
    let $handNote := $file//tei:handNote[@xml:id = $hand]
34
    return
35
36
37
38
39
40
41
42
        if($handNote/@script = "contemporary")
            then
                true()
            else
                false()
};


43
44
45
46
47
48
(:~
 : Checks if a node is transposed, i.e. if is encoded at another place than it
 : should be serialized.
 :
 : TODO: doesn't work at the moment
 :
MRodz's avatar
MRodz committed
49
 : @author Michelle Weidling
50
51
52
53
 : @param $node the current TEI node to be checked
 : @return xs:boolean
 :)
declare function simpleHelpers:is-transposed($node as node())
54
55
56
57
58
59
60
61
62
as xs:boolean {
    let $root := $node/ancestor::tei:TEI
    return
        if($root//tei:ptr[contains(@target, $node/@xml:id)]) then
            true()
        else
            false()
};

MRodz's avatar
MRodz committed
63

64
65
(:~
 : Performs a couple of processing steps on a text node:
66
 :
67
68
 : 1.   for the edited text only hyphens that are marked with
 :      <tei:lb break="keepHyphen"/> should be displayed
69
70
71
72
 : 2.   round s (ſ) is normalized to "s"
 : 3.   the Tironian note is normalized to "etc.""
 : 4.   inserts an additional whitespace if a line ends with a character (TODO)
 :
MRodz's avatar
MRodz committed
73
 : @author Michelle Weidling
74
75
76
 : @param $node the current text node
 : @return text() the formatted text
 : :)
77
declare function simpleHelpers:prepare-text($node as text()) as text()? {
78
    if(not(normalize-space($node) = "")) then
MRodz's avatar
MRodz committed
79
80
81
82
        (: the @ serves as a flag for the removal of hyphens. this is necessary
        since we sometimes have cases where a hyphen is the only content of a 
        string. functx:substring-before-last would therefore produce an empty
        string which leads to problems while preparing the text any further. :)
83
        let $cleared-end-hyphen :=
84
            if(ends-with($node, "-") and not(simpleHelpers:keep-hyphen($node))) then
MRodz's avatar
MRodz committed
85
                text {functx:substring-before-last($node, "-") || "@P"}
86
            else if(ends-with($node, "⸗") and not(simpleHelpers:keep-hyphen($node))) then
MRodz's avatar
MRodz committed
87
                text {functx:substring-before-last($node, "⸗") || "@P"}
88
            else
MRodz's avatar
MRodz committed
89
                replace($node, "⸗", "-")
90
        let $cleared-hyphen := replace($cleared-end-hyphen, "⸗", "-")
91
        let $cleared-round-s := replace($cleared-hyphen, "ſ", "s")
MRodz's avatar
MRodz committed
92
        let $cleared-Tironian := replace($cleared-round-s, "&#x204a;c.", "etc.")
93
(:        let $normalized := normalize-space($cleared-round-s):)
MRodz's avatar
MRodz committed
94
95
96
97
98
99
100
101
(:        let $last-char := substring($cleared-round-s, string-length($cleared-round-s), 1):)
(:        let $add-whitespace := :)
(:            if(matches($last-char, "[\w\d,\.;?!]"):)
(:            and $node/parent::tei:line/child::*[last()] = . ) then:)
(:                $cleared-round-s || " ":)
(:            else:)
(:                $cleared-round-s:)
(:        return text {$add-whitespace}:)
MRodz's avatar
MRodz committed
102
103
104
105
        return 
            (: in cases where a given $node only consists of a hyphen we don't
            return a text node because it's unnecessary and leads to problems 
            while testing. :)
MRodz's avatar
MRodz committed
106
            if(matches($cleared-Tironian, "@P") 
MRodz's avatar
MRodz committed
107
            and string-length($cleared-Tironian) gt 1
MRodz's avatar
MRodz committed
108
109
            or not(matches($cleared-Tironian, "@P"))) then
                text {$cleared-Tironian}
MRodz's avatar
MRodz committed
110
111
            else
                ()
112
113
114
115
116
117
    else
        ()
};


(:~
118
119
 : Checks if a hyphen should be kept for the edited text or not.
 :
MRodz's avatar
MRodz committed
120
 : @author Michelle Weidling
121
122
 : @param $node the current text node
 : @return xs:boolean
123
 :)
124
declare function simpleHelpers:keep-hyphen($node as text()) as xs:boolean {
125
126
127
128
129
130
131
132
    if($node/parent::tei:line/following-sibling::*[1][self::tei:lb[@break = "keepHyphen"]])
        then
            true()
    else
        false()
};


133
134
135
(:~
 : Checks if the current node has valid text.
 :
MRodz's avatar
MRodz committed
136
 : @author Michelle Weidling
137
138
139
 : @param $node
 : @return xs:boolean
 :)
140
declare function simpleHelpers:has-valid-text($node as node()) as xs:boolean {
141
    let $text-nodes := $node/descendant::text()[not(normalize-space(.) = "")]
142
    let $results :=
143
        for $text-node in $text-nodes
144
            return
145
                simpleHelpers:is-valid-text($text-node)
146
    return
147
148
149
150
151
152
153
        if(functx:is-value-in-sequence(true(), $results)) then
            true()
        else
            false()
};


154
155
156
157
158
159
(:~
 : Checks if a text node has valid text. A text is valid if it is written by a
 : contemporary hand or by Friedrich Fontane (in case it's text on a label or
 : on a calendar (TODO)). The same holds for notes of the modern editors which
 : are marked by @type = "edited_text".
 :
MRodz's avatar
MRodz committed
160
 : @author Michelle Weidling
161
162
163
 : @node a text node
 : @return xs:boolean
 :)
164
declare function simpleHelpers:is-valid-text($node as text()) as xs:boolean {
MRodz's avatar
MRodz committed
165
    let $current-hand := $node/preceding::tei:milestone[@unit = "handshift"][@subtype][1]/@subtype
166
    return
Michelle Rodzis's avatar
Michelle Rodzis committed
167
    if((simpleHelpers:is-hand-contemporary($current-hand)
168
169
170
    or ($node[ancestor::tei:surface[@type = "label"]]
        and matches($current-hand, "Friedrich_Fontane"))
    or $node/ancestor::*[@type = "edited_text"]
Michelle Rodzis's avatar
Michelle Rodzis committed
171
    or $node/ancestor::tei:figDesc)
172
173
    and not(normalize-space($node) = "")) then
        true()
174
    else
175
176
177
        false()
};

178
179
180
181
(:~
 : Checks if the current tei:surface is a page. Pages can be recognized by their
 : pagination (e.g. 4v) in the node's @n.
 :
MRodz's avatar
MRodz committed
182
 : @author Michelle Weidling
183
184
185
186
187
 : @node element(tei:surface)
 : @return xs:boolean
 :)
declare function simpleHelpers:is-page($node as element(tei:surface))
as xs:boolean {
188
189
190
191
192
193
    matches($node/@n, "[0-9IVXMCD]{1,7}[rv]{1}")
};


(:~
 : Retrieves the font size from the @style of a tei:seg.
194
 :
MRodz's avatar
MRodz committed
195
 : @author Michelle Weidling
196
197
198
 : @param $node the current tei:line, tei:zone or tei:seg node with @type = heading
 : @return xs:string the font size value
 : :)
199
declare function simpleHelpers:get-font-size($node as node()) as xs:string {
200
    (: example for style: "font-size:large; letter-spacing:0.2cm; text-decoration:underline" :)
201
202
203
204
205
206
    let $tmp := substring-after($node/descendant-or-self::tei:seg[matches(@style, "font-size") and not(ancestor::tei:add[@place = "above"])]/@style, "font-size:")
    return 
        if(matches($tmp, ";")) then
            substring-before($tmp, ";")
        else
            $tmp
207
208
};

MRodz's avatar
MRodz committed
209

210
211
212
(:~
 : Checks if a node's @style attribute contains any relevant information.
 :
MRodz's avatar
MRodz committed
213
 : @author Michelle Weidling
214
215
216
217
 : @param $node the current tei:zone, tei:line, or tei:seg node
 : @return xs:boolean
 :)
declare function simpleHelpers:has-valid-style($node as node())
218
219
as xs:boolean {
    let $style := $node/@style
220
    return
221
222
223
224
225
226
227
228
229
230
231
232
233
        if(matches($style, "font")
        or matches($style, "align")
        or matches($style, "spacing")
        or matches($style, "uppercase")
        or matches($style, "color:red")
        or matches($style, "black_letter")
        or matches($style, "roman")
        or matches($style, "line-through")) then
            true()
        else
            false()
};

MRodz's avatar
MRodz committed
234

235
236
237
238
239
240
241
242
243
(:~
 : Returns a sequence of strings containing renditions that are relevant for an
 : element in the edited text and are merged into a single @rendition.
 :
 : Since this information can be held either in a node's @style or in its
 : @rendition, we have to check both of them separately before combining them
 : into one element for convenience reasons.
 : TODO: check if @rendition is the right attribute for that
 :
MRodz's avatar
MRodz committed
244
 : @author Michelle Weidling
245
246
247
 : @param $node the current tei:zone, tei:line, or tei:seg element
 : @return a string of all relevant information
 :)
248
249
declare function simpleHelpers:filter-rendition($node as node()) as xs:string* {
    let $styles := tokenize($node/@style, " ")
250
251
    let $relevant-styles :=
        for $style in $styles
252
        return
253
254
255
            if(matches($style, "font")
            or matches($style, "align")
            or matches($style, "spacing")
MRodz's avatar
MRodz committed
256
            or matches($style, "uppercase")
257
258
259
            or matches($style, "color:red")
            or matches($style, "black_letter")
            or matches($style, "roman")
260
261
262
263
            or matches($style, "line-through")) then
                $style
            else
                ()
264
265
266
267
268
269
270
271
272
273
274
275
276
    let $transformed-renditions :=
        for $rend in tokenize($node/@rendition, " ")
            return
                if(matches($rend, "black_letter")) then
                    "black_letter"
                else if(matches($rend, "roman")) then
                    "roman"
                else
                    if(matches($rend, "#")) then
                        substring-after($rend, "#")
                    else
                        $rend
    let $new-renditions := ($relevant-styles, $transformed-renditions)
277
    return string-join($new-renditions, " ")
278
279
};

MRodz's avatar
MRodz committed
280

281
282
283
284
285
286
(:~
 : Checks if the current hand is valid. For the edited text we only need to
 : consider contemporary hands/prints or additions by Friedrich Fontane if they
 : occur on labels and on the backside of calendar pages.
 :
 : TODO check if we reach calendar pages
MRodz's avatar
MRodz committed
287
 : @author Michelle Weidling
288
289
 : @param $hands a string sequence containing all contemporary hands that are
 :          declared in tei:handNotes
MRodz's avatar
MRodz committed
290
 : @param $node the current tei:milestone[@unit = "handshift"] element
291
292
 : @return xs:boolean
 :)
293
declare function simpleHelpers:is-hand-valid($hands as xs:string*,
294
295
$node as element(tei:milestone)) as xs:boolean {
    let $current-hand := replace($node/@subtype, "#", "")
MRodz's avatar
MRodz committed
296
297
298
299
300
301
302
303
    return
        if(functx:is-value-in-sequence($current-hand, $hands)
        or ($node/ancestor::tei:div[@type = "label"]
            and matches($current-hand, "Friedrich_Fontane"))
        ) then
            true()
        else
            false()
304
305
};

306

307
declare function simpleHelpers:belongs-to-valid-hand($hands as xs:string*,
308
309
$node as element(*)*) as xs:boolean {
    (: in some cases elements like tei:front or tei:body don't have a preceeding
MRodz's avatar
MRodz committed
310
    tei:milestone[@unit = "handshift"] because the initial pages are empty. 
311
    in these cases we want to preserve the element and therefore set a valid 
MRodz's avatar
MRodz committed
312
    tei:milestone[@unit = "handshift"] :)
313
    let $prev-hand :=
MRodz's avatar
MRodz committed
314
315
        if($node/preceding::tei:milestone[@unit = "handshift"][1]) then
            $node/preceding::tei:milestone[@unit = "handshift"][1]
316
        else
MRodz's avatar
MRodz committed
317
            <tei:milestone unit="handshift" subtype="#Fontane"/>
318
319
320
321
322
323
324
325
    return
        if(simpleHelpers:is-hand-valid($hands, $prev-hand)) then
            true()
        else
            false()
};


326
declare function simpleHelpers:find-prev-valid-hand($hands as xs:string*, 
327
$node as element(tei:milestone)) as element(tei:milestone){
MRodz's avatar
MRodz committed
328
    $node/preceding::tei:milestone[@unit = "handshift"][1][simpleHelpers:is-hand-valid($hands, $node)]
329
330
};

331
(:~
MRodz's avatar
MRodz committed
332
333
 : Checks if the previous milestone[@unit = "handshift"] is the same as the 
 : current milestone[@unit = "handshift"]. They are the same if they have the 
334
 : same attributes.
335
 :
MRodz's avatar
MRodz committed
336
 : @author Michelle Weidling
MRodz's avatar
MRodz committed
337
 : @param $node the current tei:milestone[@unit = "handshift"] element
338
339
 : @return xs:boolean
 :)
340
declare function simpleHelpers:is-prev-valid-hand-same($hands as xs:string,
341
$node as element(tei:milestone)) as xs:boolean {
342
    let $prev-valid-hand := simpleHelpers:find-prev-valid-hand($hands, $node)
MRodz's avatar
MRodz committed
343
    let $prev-hand := $node/preceding::tei:milestone[@unit = "handshift"][1]
mrodzis's avatar
mrodzis committed
344
345
346
347
    return
        (: since we can't take the order of the attributes for granted we can't
        use functx:sequence-deep-equal :)
        if($prev-hand
348
        and $prev-hand = $prev-valid-hand
349
350
        and $node/@subtype = $prev-hand/@subtype
        and $node/@rend = $prev-hand/@rend) then
mrodzis's avatar
mrodzis committed
351
352
353
354
355
            true()
        else
            false()
};

356
(:~
MRodz's avatar
MRodz committed
357
358
 : Checks if the previous milestone[@unit = "handshift"] is the same as the 
 : current milestone[@unit = "handshift"]. They are the same if they have the 
359
 : same attributes.
360
 :
MRodz's avatar
MRodz committed
361
 : @author Michelle Weidling
MRodz's avatar
MRodz committed
362
 : @param $node the current tei:milestone[@unit = "handshift"] element
363
364
 : @return xs:boolean
 :)
365
declare function simpleHelpers:is-prev-hand-same($node as element(tei:milestone)) 
366
as xs:boolean {
MRodz's avatar
MRodz committed
367
    let $prev-hand := $node/preceding::tei:milestone[@unit = "handshift"][1]
368
369
370
371
    return
        (: since we can't take the order of the attributes for granted we can't
        use functx:sequence-deep-equal :)
        if($prev-hand
372
373
        and $node/@subtype = $prev-hand/@subtype
        and $node/@rend = $prev-hand/@rend) then
374
375
376
377
378
            true()
        else
            false()
};

mrodzis's avatar
mrodzis committed
379

MRodz's avatar
MRodz committed
380
(: TODO :)
381
(:declare function simpleHelpers:make-section($node as element(tei:milestone))
MRodz's avatar
MRodz committed
382
383
384
385
386
387
as element(tei:section) {
    let $id := substring-after($node/@spanTo, "#")
    let $corresp := $node/following::*[$id = @xml:id]
    let $bla := console:log(util:get-fragment-between($node, $corresp, true(), true()))
    return
        <div/>
388
389
};:)

390
declare function simpleHelpers:find-chars($node as element(tei:lb)) as node()* {
391
392
393
394
    let $prev-line := $node/preceding::tei:line[1]
    let $prev-last-text := $prev-line/text()[last()]
    let $prev-length := string-length($prev-last-text)
    let $prev-char := substring($prev-last-text, $prev-length,$prev-length)
395
    
396
397
    let $next-line := $node/following::tei:line[1]
    let $next-first-text := $next-line/text()[1]
398
399
400
    let $next-char := substring($next-first-text, 1, 1)   
    
    let $milestone :=
401
        element {QName("http://www.tei-c.org/ns/1.0", "milestone")} {
402
403
            attribute unit {"line"}
        }
404
    
405
406
    return 
        (text{$prev-char}, $milestone,  text {$next-char})
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
    
};

declare function simpleHelpers:trim-last-char($node as element(tei:line)) 
as text() {
    let $length := string-length($node/string())
    let $trim-last-char := substring($node/string(), 1, $length - 1)
    return text{$trim-last-char}
};

declare function simpleHelpers:trim-first-char($node as element(tei:line)) 
as text() {
    let $length := string-length($node/string())
    let $trim-first-char := substring($node/string(), 2, $length)
    return text{$trim-first-char}
422
423
};

MRodz's avatar
MRodz committed
424
declare function simpleHelpers:start-line($node as element()) 
425
as element(tei:milestone) {
426
    element {QName("http://www.tei-c.org/ns/1.0", "milestone")} {
427
428
        attribute unit {"line"},
        if($node/@rend) then
Michelle Rodzis's avatar
Michelle Rodzis committed
429
            attribute rendition {$node/@rend}
430
431
        else
            ()
432
    }
Michelle Rodzis's avatar
Michelle Rodzis committed
433
};
434
435
436
437
438
439

declare function simpleHelpers:assure-dir-available($dir-name as xs:string) {
    if(xmldb:collection-available($config:app-root || $dir-name)) then
        ()
    else
        xmldb:create-collection($config:app-root, $dir-name)
440
441
442
443
444
445
446
447
448
449
450
451
452
};

(:~ Checks if a text node that begins with a whitespace needs trimming at its
 : start.
 :  :)
declare function simpleHelpers:is-trimming-necessary($text as text()) {
    let $second-character := substring($text, 2, 2)

    return 
        if(matches($second-character, "[A-Z(]")) then
            false()
        else
            true()
453
};