teisimplehelpers.xqm 15.5 KB
Newer Older
1
2
xquery version "3.1";

3
4
5
6
(:~
 : This module contains a potpourri of functions that are needed to serialize
 : the TEI simplePrint version of the Fontane TEI encoding.
 :
MRodz's avatar
MRodz committed
7
 : @author Michelle Weidling
8
9
10
 : @version 1.0
 :)

11
module namespace simpleHelpers="http://fontane-nb.dariah.eu/teisimplehelpers";
12
13
14
15

declare namespace tei="http://www.tei-c.org/ns/1.0";

import module namespace console="http://exist-db.org/xquery/console";
16
import module namespace config="http://textgrid.de/ns/SADE/config" at "../../config/config.xqm";
17
18
19
20
21
import module namespace functx = "http://www.functx.com";


(: TODO: adapt to general XML :)
(:~
22
 : Determines whether the passed tei:milestone is contemporary or not.
23
 :
24
 : TODO: check if we reach backside of calendar pages at all
MRodz's avatar
MRodz committed
25
 : @author Michelle Weidling
26
 : @param $hand the tei:milestone/@subtype or tei:mod/@hand to be checked
27
 : @return xs:boolean
28
29
 :)
declare function simpleHelpers:is-hand-contemporary($hand as xs:string?)
30
31
as xs:boolean {
    let $hand := replace($hand, "#", "")
32
    let $file := doc("/db/sade-projects/textgrid/data/xml/data/16b00.xml")
33
    let $handNote := $file//tei:handNote[@xml:id = $hand]
34
    return
35
36
37
38
39
40
41
42
        if($handNote/@script = "contemporary")
            then
                true()
            else
                false()
};


43
44
45
46
47
48
(:~
 : Checks if a node is transposed, i.e. if is encoded at another place than it
 : should be serialized.
 :
 : TODO: doesn't work at the moment
 :
MRodz's avatar
MRodz committed
49
 : @author Michelle Weidling
50
51
52
53
 : @param $node the current TEI node to be checked
 : @return xs:boolean
 :)
declare function simpleHelpers:is-transposed($node as node())
54
55
56
57
58
59
60
61
62
as xs:boolean {
    let $root := $node/ancestor::tei:TEI
    return
        if($root//tei:ptr[contains(@target, $node/@xml:id)]) then
            true()
        else
            false()
};

MRodz's avatar
MRodz committed
63

64
65
(:~
 : Performs a couple of processing steps on a text node:
66
 :
67
68
 : 1.   for the edited text only hyphens that are marked with
 :      <tei:lb break="keepHyphen"/> should be displayed
69
70
71
72
 : 2.   round s (ſ) is normalized to "s"
 : 3.   the Tironian note is normalized to "etc.""
 : 4.   inserts an additional whitespace if a line ends with a character (TODO)
 :
MRodz's avatar
MRodz committed
73
 : @author Michelle Weidling
74
75
76
 : @param $node the current text node
 : @return text() the formatted text
 : :)
77
declare function simpleHelpers:prepare-text($node as text()) as text()? {
78
    if(not(normalize-space($node) = "")) then
MRodz's avatar
MRodz committed
79
80
81
82
        (: the @ serves as a flag for the removal of hyphens. this is necessary
        since we sometimes have cases where a hyphen is the only content of a 
        string. functx:substring-before-last would therefore produce an empty
        string which leads to problems while preparing the text any further. :)
83
        let $cleared-end-hyphen :=
mrodzis's avatar
mrodzis committed
84
85
86
87
            if((ends-with($node, "-") or ends-with($node, "⸗"))
            and $node/parent::tei:add) then
                $node
            else if(ends-with($node, "-") and not(simpleHelpers:keep-hyphen($node))) then
MRodz's avatar
MRodz committed
88
                text {functx:substring-before-last($node, "-") || "@P"}
89
            else if(ends-with($node, "⸗") and not(simpleHelpers:keep-hyphen($node))) then
MRodz's avatar
MRodz committed
90
                text {functx:substring-before-last($node, "⸗") || "@P"}
91
            else
MRodz's avatar
MRodz committed
92
                replace($node, "⸗", "-")
93
        let $cleared-hyphen := replace($cleared-end-hyphen, "⸗", "-")
94
        let $cleared-round-s := replace($cleared-hyphen, "ſ", "s")
MRodz's avatar
MRodz committed
95
        let $cleared-Tironian := replace($cleared-round-s, "&#x204a;c.", "etc.")
96
(:        let $normalized := normalize-space($cleared-round-s):)
MRodz's avatar
MRodz committed
97
98
99
100
101
102
103
104
(:        let $last-char := substring($cleared-round-s, string-length($cleared-round-s), 1):)
(:        let $add-whitespace := :)
(:            if(matches($last-char, "[\w\d,\.;?!]"):)
(:            and $node/parent::tei:line/child::*[last()] = . ) then:)
(:                $cleared-round-s || " ":)
(:            else:)
(:                $cleared-round-s:)
(:        return text {$add-whitespace}:)
MRodz's avatar
MRodz committed
105
106
107
108
        return 
            (: in cases where a given $node only consists of a hyphen we don't
            return a text node because it's unnecessary and leads to problems 
            while testing. :)
MRodz's avatar
MRodz committed
109
            if(matches($cleared-Tironian, "@P") 
MRodz's avatar
MRodz committed
110
            and string-length($cleared-Tironian) gt 1
MRodz's avatar
MRodz committed
111
112
            or not(matches($cleared-Tironian, "@P"))) then
                text {$cleared-Tironian}
MRodz's avatar
MRodz committed
113
114
            else
                ()
115
116
117
118
119
120
    else
        ()
};


(:~
121
122
 : Checks if a hyphen should be kept for the edited text or not.
 :
MRodz's avatar
MRodz committed
123
 : @author Michelle Weidling
124
125
 : @param $node the current text node
 : @return xs:boolean
126
 :)
127
declare function simpleHelpers:keep-hyphen($node as text()) as xs:boolean {
mrodzis's avatar
mrodzis committed
128
    if($node/ancestor::tei:line/following-sibling::*[1][self::tei:lb[@break = "keepHyphen"]])
129
130
131
132
133
134
135
        then
            true()
    else
        false()
};


136
137
138
(:~
 : Checks if the current node has valid text.
 :
MRodz's avatar
MRodz committed
139
 : @author Michelle Weidling
140
141
142
 : @param $node
 : @return xs:boolean
 :)
143
declare function simpleHelpers:has-valid-text($node as node()) as xs:boolean {
144
    let $text-nodes := $node/descendant::text()[not(normalize-space(.) = "")]
145
    let $results :=
146
        for $text-node in $text-nodes
147
            return
148
                simpleHelpers:is-valid-text($text-node)
149
    return
150
151
152
153
154
155
156
        if(functx:is-value-in-sequence(true(), $results)) then
            true()
        else
            false()
};


157
158
159
160
161
162
(:~
 : Checks if a text node has valid text. A text is valid if it is written by a
 : contemporary hand or by Friedrich Fontane (in case it's text on a label or
 : on a calendar (TODO)). The same holds for notes of the modern editors which
 : are marked by @type = "edited_text".
 :
MRodz's avatar
MRodz committed
163
 : @author Michelle Weidling
164
165
166
 : @node a text node
 : @return xs:boolean
 :)
167
declare function simpleHelpers:is-valid-text($node as text()) as xs:boolean {
MRodz's avatar
MRodz committed
168
    let $current-hand := $node/preceding::tei:milestone[@unit = "handshift"][@subtype][1]/@subtype
169
    return
Michelle Rodzis's avatar
Michelle Rodzis committed
170
    if((simpleHelpers:is-hand-contemporary($current-hand)
171
172
173
    or ($node[ancestor::tei:surface[@type = "label"]]
        and matches($current-hand, "Friedrich_Fontane"))
    or $node/ancestor::*[@type = "edited_text"]
Michelle Rodzis's avatar
Michelle Rodzis committed
174
    or $node/ancestor::tei:figDesc)
175
176
    and not(normalize-space($node) = "")) then
        true()
177
    else
178
179
180
        false()
};

181
182
183
184
(:~
 : Checks if the current tei:surface is a page. Pages can be recognized by their
 : pagination (e.g. 4v) in the node's @n.
 :
MRodz's avatar
MRodz committed
185
 : @author Michelle Weidling
186
187
188
189
190
 : @node element(tei:surface)
 : @return xs:boolean
 :)
declare function simpleHelpers:is-page($node as element(tei:surface))
as xs:boolean {
191
192
193
194
195
196
    matches($node/@n, "[0-9IVXMCD]{1,7}[rv]{1}")
};


(:~
 : Retrieves the font size from the @style of a tei:seg.
197
 :
MRodz's avatar
MRodz committed
198
 : @author Michelle Weidling
199
200
201
 : @param $node the current tei:line, tei:zone or tei:seg node with @type = heading
 : @return xs:string the font size value
 : :)
202
declare function simpleHelpers:get-font-size($node as node()) as xs:string {
203
    (: example for style: "font-size:large; letter-spacing:0.2cm; text-decoration:underline" :)
204
205
206
207
208
209
    let $tmp := substring-after($node/descendant-or-self::tei:seg[matches(@style, "font-size") and not(ancestor::tei:add[@place = "above"])]/@style, "font-size:")
    return 
        if(matches($tmp, ";")) then
            substring-before($tmp, ";")
        else
            $tmp
210
211
};

MRodz's avatar
MRodz committed
212

213
214
215
(:~
 : Checks if a node's @style attribute contains any relevant information.
 :
MRodz's avatar
MRodz committed
216
 : @author Michelle Weidling
217
218
219
220
 : @param $node the current tei:zone, tei:line, or tei:seg node
 : @return xs:boolean
 :)
declare function simpleHelpers:has-valid-style($node as node())
221
222
as xs:boolean {
    let $style := $node/@style
223
    return
224
225
226
227
228
229
230
231
232
233
234
235
236
        if(matches($style, "font")
        or matches($style, "align")
        or matches($style, "spacing")
        or matches($style, "uppercase")
        or matches($style, "color:red")
        or matches($style, "black_letter")
        or matches($style, "roman")
        or matches($style, "line-through")) then
            true()
        else
            false()
};

MRodz's avatar
MRodz committed
237

238
239
240
241
242
243
244
245
246
(:~
 : Returns a sequence of strings containing renditions that are relevant for an
 : element in the edited text and are merged into a single @rendition.
 :
 : Since this information can be held either in a node's @style or in its
 : @rendition, we have to check both of them separately before combining them
 : into one element for convenience reasons.
 : TODO: check if @rendition is the right attribute for that
 :
MRodz's avatar
MRodz committed
247
 : @author Michelle Weidling
248
249
250
 : @param $node the current tei:zone, tei:line, or tei:seg element
 : @return a string of all relevant information
 :)
251
252
declare function simpleHelpers:filter-rendition($node as node()) as xs:string* {
    let $styles := tokenize($node/@style, " ")
253
254
    let $relevant-styles :=
        for $style in $styles
255
        return
256
257
258
            if(matches($style, "font")
            or matches($style, "align")
            or matches($style, "spacing")
MRodz's avatar
MRodz committed
259
            or matches($style, "uppercase")
260
261
262
            or matches($style, "color:red")
            or matches($style, "black_letter")
            or matches($style, "roman")
263
264
265
266
            or matches($style, "line-through")) then
                $style
            else
                ()
267
268
269
270
271
272
273
274
275
276
277
278
279
    let $transformed-renditions :=
        for $rend in tokenize($node/@rendition, " ")
            return
                if(matches($rend, "black_letter")) then
                    "black_letter"
                else if(matches($rend, "roman")) then
                    "roman"
                else
                    if(matches($rend, "#")) then
                        substring-after($rend, "#")
                    else
                        $rend
    let $new-renditions := ($relevant-styles, $transformed-renditions)
280
    return string-join($new-renditions, " ")
281
282
};

MRodz's avatar
MRodz committed
283

284
285
286
287
288
289
(:~
 : Checks if the current hand is valid. For the edited text we only need to
 : consider contemporary hands/prints or additions by Friedrich Fontane if they
 : occur on labels and on the backside of calendar pages.
 :
 : TODO check if we reach calendar pages
MRodz's avatar
MRodz committed
290
 : @author Michelle Weidling
291
292
 : @param $hands a string sequence containing all contemporary hands that are
 :          declared in tei:handNotes
MRodz's avatar
MRodz committed
293
 : @param $node the current tei:milestone[@unit = "handshift"] element
294
295
 : @return xs:boolean
 :)
296
declare function simpleHelpers:is-hand-valid($hands as xs:string*,
297
298
$node as element(tei:milestone)) as xs:boolean {
    let $current-hand := replace($node/@subtype, "#", "")
MRodz's avatar
MRodz committed
299
300
    return
        if(functx:is-value-in-sequence($current-hand, $hands)
301
        or ($node/ancestor::*[@type = ("label", "toc")]
MRodz's avatar
MRodz committed
302
303
304
305
306
            and matches($current-hand, "Friedrich_Fontane"))
        ) then
            true()
        else
            false()
307
308
};

309

310
declare function simpleHelpers:belongs-to-valid-hand($hands as xs:string*,
311
312
$node as element(*)*) as xs:boolean {
    (: in some cases elements like tei:front or tei:body don't have a preceeding
MRodz's avatar
MRodz committed
313
    tei:milestone[@unit = "handshift"] because the initial pages are empty. 
314
    in these cases we want to preserve the element and therefore set a valid 
MRodz's avatar
MRodz committed
315
    tei:milestone[@unit = "handshift"] :)
316
    let $prev-hand :=
MRodz's avatar
MRodz committed
317
318
        if($node/preceding::tei:milestone[@unit = "handshift"][1]) then
            $node/preceding::tei:milestone[@unit = "handshift"][1]
319
        else
MRodz's avatar
MRodz committed
320
            <tei:milestone unit="handshift" subtype="#Fontane"/>
321
322
323
324
325
326
327
328
    return
        if(simpleHelpers:is-hand-valid($hands, $prev-hand)) then
            true()
        else
            false()
};


329
declare function simpleHelpers:find-prev-valid-hand($hands as xs:string*, 
330
$node as element(tei:milestone)) as element(tei:milestone){
MRodz's avatar
MRodz committed
331
    $node/preceding::tei:milestone[@unit = "handshift"][1][simpleHelpers:is-hand-valid($hands, $node)]
332
333
};

334
(:~
MRodz's avatar
MRodz committed
335
336
 : Checks if the previous milestone[@unit = "handshift"] is the same as the 
 : current milestone[@unit = "handshift"]. They are the same if they have the 
337
 : same attributes.
338
 :
MRodz's avatar
MRodz committed
339
 : @author Michelle Weidling
MRodz's avatar
MRodz committed
340
 : @param $node the current tei:milestone[@unit = "handshift"] element
341
342
 : @return xs:boolean
 :)
343
declare function simpleHelpers:is-prev-valid-hand-same($hands as xs:string,
344
$node as element(tei:milestone)) as xs:boolean {
345
    let $prev-valid-hand := simpleHelpers:find-prev-valid-hand($hands, $node)
MRodz's avatar
MRodz committed
346
    let $prev-hand := $node/preceding::tei:milestone[@unit = "handshift"][1]
mrodzis's avatar
mrodzis committed
347
348
349
350
    return
        (: since we can't take the order of the attributes for granted we can't
        use functx:sequence-deep-equal :)
        if($prev-hand
351
        and $prev-hand = $prev-valid-hand
352
353
        and $node/@subtype = $prev-hand/@subtype
        and $node/@rend = $prev-hand/@rend) then
mrodzis's avatar
mrodzis committed
354
355
356
357
358
            true()
        else
            false()
};

359
(:~
MRodz's avatar
MRodz committed
360
361
 : Checks if the previous milestone[@unit = "handshift"] is the same as the 
 : current milestone[@unit = "handshift"]. They are the same if they have the 
362
 : same attributes.
363
 :
MRodz's avatar
MRodz committed
364
 : @author Michelle Weidling
MRodz's avatar
MRodz committed
365
 : @param $node the current tei:milestone[@unit = "handshift"] element
366
367
 : @return xs:boolean
 :)
368
declare function simpleHelpers:is-prev-hand-same($node as element(tei:milestone)) 
369
as xs:boolean {
MRodz's avatar
MRodz committed
370
    let $prev-hand := $node/preceding::tei:milestone[@unit = "handshift"][1]
371
372
373
374
    return
        (: since we can't take the order of the attributes for granted we can't
        use functx:sequence-deep-equal :)
        if($prev-hand
375
376
        and $node/@subtype = $prev-hand/@subtype
        and $node/@rend = $prev-hand/@rend) then
377
378
379
380
381
            true()
        else
            false()
};

mrodzis's avatar
mrodzis committed
382

MRodz's avatar
MRodz committed
383
(: TODO :)
384
(:declare function simpleHelpers:make-section($node as element(tei:milestone))
MRodz's avatar
MRodz committed
385
386
387
388
389
390
as element(tei:section) {
    let $id := substring-after($node/@spanTo, "#")
    let $corresp := $node/following::*[$id = @xml:id]
    let $bla := console:log(util:get-fragment-between($node, $corresp, true(), true()))
    return
        <div/>
391
392
};:)

393
declare function simpleHelpers:find-chars($node as element(tei:lb)) as node()* {
394
395
396
397
    let $prev-line := $node/preceding::tei:line[1]
    let $prev-last-text := $prev-line/text()[last()]
    let $prev-length := string-length($prev-last-text)
    let $prev-char := substring($prev-last-text, $prev-length,$prev-length)
398
    
399
400
    let $next-line := $node/following::tei:line[1]
    let $next-first-text := $next-line/text()[1]
401
402
403
    let $next-char := substring($next-first-text, 1, 1)   
    
    let $milestone :=
404
        element {QName("http://www.tei-c.org/ns/1.0", "milestone")} {
405
406
            attribute unit {"line"}
        }
407
    
408
409
    return 
        (text{$prev-char}, $milestone,  text {$next-char})
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
    
};

declare function simpleHelpers:trim-last-char($node as element(tei:line)) 
as text() {
    let $length := string-length($node/string())
    let $trim-last-char := substring($node/string(), 1, $length - 1)
    return text{$trim-last-char}
};

declare function simpleHelpers:trim-first-char($node as element(tei:line)) 
as text() {
    let $length := string-length($node/string())
    let $trim-first-char := substring($node/string(), 2, $length)
    return text{$trim-first-char}
425
426
};

MRodz's avatar
MRodz committed
427
declare function simpleHelpers:start-line($node as element()) 
428
as element(tei:milestone) {
429
    element {QName("http://www.tei-c.org/ns/1.0", "milestone")} {
430
431
        attribute unit {"line"},
        if($node/@rend) then
Michelle Rodzis's avatar
Michelle Rodzis committed
432
            attribute rendition {$node/@rend}
433
434
        else
            ()
435
    }
Michelle Rodzis's avatar
Michelle Rodzis committed
436
};
437
438
439
440
441
442

declare function simpleHelpers:assure-dir-available($dir-name as xs:string) {
    if(xmldb:collection-available($config:app-root || $dir-name)) then
        ()
    else
        xmldb:create-collection($config:app-root, $dir-name)
443
444
445
446
447
448
449
450
451
452
453
454
455
};

(:~ Checks if a text node that begins with a whitespace needs trimming at its
 : start.
 :  :)
declare function simpleHelpers:is-trimming-necessary($text as text()) {
    let $second-character := substring($text, 2, 2)

    return 
        if(matches($second-character, "[A-Z(]")) then
            false()
        else
            true()
456
};