teisimplehelpers.xqm 13.3 KB
Newer Older
1
2
xquery version "3.1";

3
4
5
6
7
8
9
10
(:~
 : This module contains a potpourri of functions that are needed to serialize
 : the TEI simplePrint version of the Fontane TEI encoding.
 :
 : @author Michelle Rodzis
 : @version 1.0
 :)

11
module namespace simpleHelpers="http://fontane-nb.dariah.eu/teisimplehelpers";
12
13
14
15
16
17
18
19
20

declare namespace tei="http://www.tei-c.org/ns/1.0";

import module namespace console="http://exist-db.org/xquery/console";
import module namespace functx = "http://www.functx.com";


(: TODO: adapt to general XML :)
(:~
21
22
 : Determines whether the passed tei:handShift is contemporary or not.
 :
23
24
 : TODO: check if we reach backside of calendar pages at all
 : @author Michelle Rodzis
25
 : @param $hand the tei:handShift/@new or tei:mod/@hand to be checked
26
 : @return xs:boolean
27
28
 :)
declare function simpleHelpers:is-hand-contemporary($hand as xs:string?)
29
30
as xs:boolean {
    let $hand := replace($hand, "#", "")
31
    let $file := doc("/db/sade-projects/textgrid/data/xml/data/16b00.xml")
32
    let $handNote := $file//tei:handNote[@xml:id = $hand]
33
    return
34
35
36
37
38
39
40
41
        if($handNote/@script = "contemporary")
            then
                true()
            else
                false()
};


42
43
44
45
46
47
48
49
50
51
52
(:~
 : Checks if a node is transposed, i.e. if is encoded at another place than it
 : should be serialized.
 :
 : TODO: doesn't work at the moment
 :
 : @author Michelle Rodzis
 : @param $node the current TEI node to be checked
 : @return xs:boolean
 :)
declare function simpleHelpers:is-transposed($node as node())
53
54
55
56
57
58
59
60
61
as xs:boolean {
    let $root := $node/ancestor::tei:TEI
    return
        if($root//tei:ptr[contains(@target, $node/@xml:id)]) then
            true()
        else
            false()
};

MRodz's avatar
MRodz committed
62

63
64
(:~
 : Performs a couple of processing steps on a text node:
65
 :
66
67
 : 1.   for the edited text only hyphens that are marked with
 :      <tei:lb break="keepHyphen"/> should be displayed
68
69
70
71
 : 2.   round s (ſ) is normalized to "s"
 : 3.   the Tironian note is normalized to "etc.""
 : 4.   inserts an additional whitespace if a line ends with a character (TODO)
 :
72
73
74
75
 : @author Michelle Rodzis
 : @param $node the current text node
 : @return text() the formatted text
 : :)
76
declare function simpleHelpers:prepare-text($node as text()) as text()? {
77
    if(not(normalize-space($node) = "")) then
78
        let $cleared-hyphen :=
79
            if(ends-with($node, "-") and not(simpleHelpers:keep-hyphen($node))) then
80
                text {functx:substring-before-last($node, "-")}
81
            else if(ends-with($node, "⸗") and not(simpleHelpers:keep-hyphen($node))) then
82
83
84
85
                text {functx:substring-before-last($node, "⸗")}
            else
                $node
        let $cleared-round-s := replace($cleared-hyphen, "ſ", "s")
MRodz's avatar
MRodz committed
86
        let $cleared-Tironian := replace($cleared-round-s, "&#x204a;c.", "etc.")
87
(:        let $normalized := normalize-space($cleared-round-s):)
MRodz's avatar
MRodz committed
88
89
90
91
92
93
94
95
(:        let $last-char := substring($cleared-round-s, string-length($cleared-round-s), 1):)
(:        let $add-whitespace := :)
(:            if(matches($last-char, "[\w\d,\.;?!]"):)
(:            and $node/parent::tei:line/child::*[last()] = . ) then:)
(:                $cleared-round-s || " ":)
(:            else:)
(:                $cleared-round-s:)
(:        return text {$add-whitespace}:)
MRodz's avatar
MRodz committed
96
        return text {$cleared-Tironian}
97
98
99
100
101
102
    else
        ()
};


(:~
103
104
 : Checks if a hyphen should be kept for the edited text or not.
 :
105
106
107
 : @author Michelle Rodzis
 : @param $node the current text node
 : @return xs:boolean
108
 :)
109
declare function simpleHelpers:keep-hyphen($node as text()) as xs:boolean {
110
111
112
113
114
115
116
117
    if($node/parent::tei:line/following-sibling::*[1][self::tei:lb[@break = "keepHyphen"]])
        then
            true()
    else
        false()
};


118
119
120
121
122
123
124
(:~
 : Checks if the current node has valid text.
 :
 : @author Michelle Rodzis
 : @param $node
 : @return xs:boolean
 :)
125
declare function simpleHelpers:has-valid-text($node as node()) as xs:boolean {
126
    let $text-nodes := $node/descendant::text()[not(normalize-space(.) = "")]
127
    let $results :=
128
        for $text-node in $text-nodes
129
            return
130
                simpleHelpers:is-valid-text($text-node)
131
    return
132
133
134
135
136
137
138
        if(functx:is-value-in-sequence(true(), $results)) then
            true()
        else
            false()
};


139
140
141
142
143
144
145
146
147
148
(:~
 : Checks if a text node has valid text. A text is valid if it is written by a
 : contemporary hand or by Friedrich Fontane (in case it's text on a label or
 : on a calendar (TODO)). The same holds for notes of the modern editors which
 : are marked by @type = "edited_text".
 :
 : @author Michelle Rodzis
 : @node a text node
 : @return xs:boolean
 :)
149
declare function simpleHelpers:is-valid-text($node as text()) as xs:boolean {
150
151
    let $current-hand := $node/preceding::tei:handShift[@new][1]/@new
    return
Michelle Rodzis's avatar
Michelle Rodzis committed
152
    if((simpleHelpers:is-hand-contemporary($current-hand)
153
154
155
    or ($node[ancestor::tei:surface[@type = "label"]]
        and matches($current-hand, "Friedrich_Fontane"))
    or $node/ancestor::*[@type = "edited_text"]
Michelle Rodzis's avatar
Michelle Rodzis committed
156
    or $node/ancestor::tei:figDesc)
157
158
    and not(normalize-space($node) = "")) then
        true()
159
    else
160
161
162
        false()
};

163
164
165
166
167
168
169
170
171
172
(:~
 : Checks if the current tei:surface is a page. Pages can be recognized by their
 : pagination (e.g. 4v) in the node's @n.
 :
 : @author Michelle Rodzis
 : @node element(tei:surface)
 : @return xs:boolean
 :)
declare function simpleHelpers:is-page($node as element(tei:surface))
as xs:boolean {
173
174
175
176
177
178
    matches($node/@n, "[0-9IVXMCD]{1,7}[rv]{1}")
};


(:~
 : Retrieves the font size from the @style of a tei:seg.
179
 :
180
181
182
183
 : @author Michelle Rodzis
 : @param $node the current tei:line, tei:zone or tei:seg node with @type = heading
 : @return xs:string the font size value
 : :)
184
declare function simpleHelpers:get-font-size($node as node()) as xs:string {
185
186
187
188
189
    (: example for style: "font-size:large; letter-spacing:0.2cm; text-decoration:underline" :)
    let $tmp := substring-after($node/descendant-or-self::tei:seg[matches(@style, "font-size")]/@style, "font-size:")
    return substring-before($tmp, ";")
};

MRodz's avatar
MRodz committed
190

191
192
193
194
195
196
197
198
(:~
 : Checks if a node's @style attribute contains any relevant information.
 :
 : @author Michelle Rodzis
 : @param $node the current tei:zone, tei:line, or tei:seg node
 : @return xs:boolean
 :)
declare function simpleHelpers:has-valid-style($node as node())
199
200
as xs:boolean {
    let $style := $node/@style
201
    return
202
203
204
205
206
207
208
209
210
211
212
213
214
        if(matches($style, "font")
        or matches($style, "align")
        or matches($style, "spacing")
        or matches($style, "uppercase")
        or matches($style, "color:red")
        or matches($style, "black_letter")
        or matches($style, "roman")
        or matches($style, "line-through")) then
            true()
        else
            false()
};

MRodz's avatar
MRodz committed
215

216
217
218
219
220
221
222
223
224
225
226
227
228
(:~
 : Returns a sequence of strings containing renditions that are relevant for an
 : element in the edited text and are merged into a single @rendition.
 :
 : Since this information can be held either in a node's @style or in its
 : @rendition, we have to check both of them separately before combining them
 : into one element for convenience reasons.
 : TODO: check if @rendition is the right attribute for that
 :
 : @author Michelle Rodzis
 : @param $node the current tei:zone, tei:line, or tei:seg element
 : @return a string of all relevant information
 :)
229
230
declare function simpleHelpers:filter-rendition($node as node()) as xs:string* {
    let $styles := tokenize($node/@style, " ")
231
232
    let $relevant-styles :=
        for $style in $styles
233
        return
234
235
236
            if(matches($style, "font")
            or matches($style, "align")
            or matches($style, "spacing")
MRodz's avatar
MRodz committed
237
            or matches($style, "uppercase")
238
239
240
            or matches($style, "color:red")
            or matches($style, "black_letter")
            or matches($style, "roman")
241
242
243
244
            or matches($style, "line-through")) then
                $style
            else
                ()
245
246
247
248
249
250
251
252
253
254
255
256
257
    let $transformed-renditions :=
        for $rend in tokenize($node/@rendition, " ")
            return
                if(matches($rend, "black_letter")) then
                    "black_letter"
                else if(matches($rend, "roman")) then
                    "roman"
                else
                    if(matches($rend, "#")) then
                        substring-after($rend, "#")
                    else
                        $rend
    let $new-renditions := ($relevant-styles, $transformed-renditions)
258
    return string-join($new-renditions, " ")
259
260
};

MRodz's avatar
MRodz committed
261

262
263
264
265
266
267
268
(:~
 : Checks if the current hand is valid. For the edited text we only need to
 : consider contemporary hands/prints or additions by Friedrich Fontane if they
 : occur on labels and on the backside of calendar pages.
 :
 : TODO check if we reach calendar pages
 : @author Michelle Rodzis
269
270
 : @param $hands a string sequence containing all contemporary hands that are
 :          declared in tei:handNotes
271
272
273
 : @param $node the current tei:handShift element
 : @return xs:boolean
 :)
274
declare function simpleHelpers:is-hand-valid($hands as xs:string*,
MRodz's avatar
MRodz committed
275
276
277
278
279
280
281
282
283
284
$node as element(tei:handShift)) as xs:boolean {
    let $current-hand := replace($node/@new, "#", "")
    return
        if(functx:is-value-in-sequence($current-hand, $hands)
        or ($node/ancestor::tei:div[@type = "label"]
            and matches($current-hand, "Friedrich_Fontane"))
        ) then
            true()
        else
            false()
285
286
};

287

288
declare function simpleHelpers:belongs-to-valid-hand($hands as xs:string*,
289
290
291
292
$node as element(*)*) as xs:boolean {
    (: in some cases elements like tei:front or tei:body don't have a preceeding
    tei:handShift because the initial pages are empty. in these cases we want to
    preserve the element and therefore set a valid tei:handShift :)
293
    let $prev-hand :=
294
295
296
297
298
299
300
301
302
303
304
305
        if($node/preceding::tei:handShift[1]) then
            $node/preceding::tei:handShift[1]
        else
            <tei:handShift new="#Fontane"/>
    return
        if(simpleHelpers:is-hand-valid($hands, $prev-hand)) then
            true()
        else
            false()
};


306
307
308
309
310
declare function simpleHelpers:find-prev-valid-hand($hands as xs:string*, 
$node as element(tei:handShift)) as element(tei:handShift){
    $node/preceding::tei:handShift[1][simpleHelpers:is-hand-valid($hands, $node)]
};

311
(:~
312
 : Checks if the previous handShift is the same as the current handShift. Two
MRodz's avatar
MRodz committed
313
 : handShifts are the same if they have the same attributes.
314
315
316
317
318
 :
 : @author Michelle Rodzis
 : @param $node the current tei:handShift element
 : @return xs:boolean
 :)
319
declare function simpleHelpers:is-prev-valid-hand-same($hands as xs:string,
mrodzis's avatar
mrodzis committed
320
$node as element(tei:handShift)) as xs:boolean {
321
322
    let $prev-valid-hand := simpleHelpers:find-prev-valid-hand($hands, $node)
    let $prev-hand := $node/preceding::tei:handShift[1]
mrodzis's avatar
mrodzis committed
323
324
325
326
    return
        (: since we can't take the order of the attributes for granted we can't
        use functx:sequence-deep-equal :)
        if($prev-hand
327
        and $prev-hand = $prev-valid-hand
mrodzis's avatar
mrodzis committed
328
329
330
331
332
333
334
335
        and $node/@new = $prev-hand/@new
        and $node/@script = $prev-hand/@script
        and $node/@medium = $prev-hand/@medium) then
            true()
        else
            false()
};

336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
(:~
 : Checks if the previous handShift is the same as the current handShift. Two
 : handShifts are the same if they have the same attributes.
 :
 : @author Michelle Rodzis
 : @param $node the current tei:handShift element
 : @return xs:boolean
 :)
declare function simpleHelpers:is-prev-hand-same($node as element(tei:handShift)) 
as xs:boolean {
    let $prev-hand := $node/preceding::tei:handShift[1]
    return
        (: since we can't take the order of the attributes for granted we can't
        use functx:sequence-deep-equal :)
        if($prev-hand
        and $node/@new = $prev-hand/@new
        and $node/@script = $prev-hand/@script
        and $node/@medium = $prev-hand/@medium) then
            true()
        else
            false()
};

mrodzis's avatar
mrodzis committed
359

MRodz's avatar
MRodz committed
360
(: TODO :)
361
(:declare function simpleHelpers:make-section($node as element(tei:milestone))
MRodz's avatar
MRodz committed
362
363
364
365
366
367
as element(tei:section) {
    let $id := substring-after($node/@spanTo, "#")
    let $corresp := $node/following::*[$id = @xml:id]
    let $bla := console:log(util:get-fragment-between($node, $corresp, true(), true()))
    return
        <div/>
368
369
};:)

370
declare function simpleHelpers:find-chars($node as element(tei:lb)) as node()* {
371
372
373
374
    let $prev-line := $node/preceding::tei:line[1]
    let $prev-last-text := $prev-line/text()[last()]
    let $prev-length := string-length($prev-last-text)
    let $prev-char := substring($prev-last-text, $prev-length,$prev-length)
375
    
376
377
    let $next-line := $node/following::tei:line[1]
    let $next-first-text := $next-line/text()[1]
378
379
380
381
382
383
    let $next-char := substring($next-first-text, 1, 1)   
    
    let $milestone :=
        element tei:milestone {
            attribute unit {"line"}
        }
384
    
385
386
    return 
        (text{$prev-char}, $milestone,  text {$next-char})
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
    
};

declare function simpleHelpers:trim-last-char($node as element(tei:line)) 
as text() {
    let $length := string-length($node/string())
    let $trim-last-char := substring($node/string(), 1, $length - 1)
    return text{$trim-last-char}
};

declare function simpleHelpers:trim-first-char($node as element(tei:line)) 
as text() {
    let $length := string-length($node/string())
    let $trim-first-char := substring($node/string(), 2, $length)
    return text{$trim-first-char}
402
403
};

MRodz's avatar
MRodz committed
404
declare function simpleHelpers:start-line($node as element()) 
405
as element(tei:milestone) {
406
    element tei:milestone {
407
408
409
410
411
        attribute unit {"line"},
        if($node/@rend) then
            $node/@rend
        else
            ()
412
    }
413
};