tei2teisimple.xqm 19.2 KB
Newer Older
MRodz's avatar
MRodz committed
1
2
xquery version "3.1";

3
(:~
Michelle Rodzis's avatar
Michelle Rodzis committed
4
5
 : This modules handles the conversion of the Fontante-TEI/XML into TEI simplePrint
 : for the edited text. The resulting TEI simplePrint is the basis for the "Editerter
6
 : Text" (edited text) view on the website and the book. It represents the latest
Michelle Rodzis's avatar
Michelle Rodzis committed
7
 : layer of text.
8
 :
MRodz's avatar
MRodz committed
9
10
 : @author Michelle Rodzis
 : @version 0.1
Michelle Rodzis's avatar
Michelle Rodzis committed
11
 : @since TODO
MRodz's avatar
MRodz committed
12
13
 :)

14
module namespace fontaneSimple="http://fontane-nb.dariah.eu/teisimple";
MRodz's avatar
MRodz committed
15

MRodz's avatar
MRodz committed
16

MRodz's avatar
MRodz committed
17
18
19
20
declare namespace tei="http://www.tei-c.org/ns/1.0";
declare namespace test="http://exist-db.org/xquery/xqsuite";

import module namespace console="http://exist-db.org/xquery/console";
21
import module namespace simpleHelpers="http://fontane-nb.dariah.eu/teisimplehelpers" at "teisimplehelpers.xqm";
MRodz's avatar
MRodz committed
22

23
(:~
MRodz's avatar
MRodz committed
24
 : The main function initiates the transformation of a given notebook.
25
 :
MRodz's avatar
MRodz committed
26
 : TODO: adapt to several input files?
27
 :
MRodz's avatar
MRodz committed
28
 : :)
29
declare function fontaneSimple:main($file as xs:string) as xs:string? {
30
  let $doc :=
MRodz's avatar
MRodz committed
31
32
33
34
35
    try {
      (doc("/db/sade-projects/textgrid/data/xml/data/" || $file))
    } catch * {
      (console:log("It was not possible to open the requested file " || $file))
    }
36

MRodz's avatar
Fix #23    
MRodz committed
37
38
    let $front-covers := $doc//tei:sourceDoc/tei:surface[contains(@n, "front_cover")]
    let $back-covers := $doc//tei:sourceDoc/tei:surface[contains(@n, "back_cover")]
39
    let $content := $doc//tei:sourceDoc/tei:surface[not(contains(@n, "cover")
MRodz's avatar
MRodz committed
40
        or matches(@n, "spine"))]
41

42
    let $tei := <TEI xmlns="http://www.tei-c.org/ns/1.0">
MRodz's avatar
MRodz committed
43
        {$doc//tei:teiHeader}
MRodz's avatar
MRodz committed
44
        <text>
45
46
47
            <front>{fontaneSimple:transform($front-covers)}</front>
            <body>{fontaneSimple:transform($content)}</body>
            <back>{fontaneSimple:transform($back-covers)}</back>
MRodz's avatar
MRodz committed
48
        </text>
MRodz's avatar
MRodz committed
49
      </TEI>
MRodz's avatar
MRodz committed
50
    return xmldb:store("/db/apps/SADE/resources/xml/", "tei-simple-tmp.xml", $tei)
MRodz's avatar
MRodz committed
51
52
};

53
(:~
54
55
56
57
58
59
60
61
62
 : Recursivly iterates the passed nodes and converts them according to the
 : requirements for the "Edierter Text". While it convers almost all of the
 : requirements stated in the encoding documentation (c.f.
 : https://fontane-nb.dariah.eu/doku.html), some parts of it are handled in a
 : second step (c.f. TODO) - especially the removal of tei:handShift duplicates
 : and the tei:milestone expansion to tei:div[@type = "section"] resp. tei:p -
 : because it is easier to perform these steps after the XML hierarchy has been
 : flattened a bit.
 :
MRodz's avatar
MRodz committed
63
 : @author Michelle Rodzis
64
 : @param $nodes the elements of the book covers and the book content
Michelle Rodzis's avatar
Michelle Rodzis committed
65
 : @return $node()* a TEI simplePrint element
MRodz's avatar
MRodz committed
66
 :)
67
declare function fontaneSimple:transform($nodes as node()*) as node()* {
MRodz's avatar
MRodz committed
68
69
70
    for $node in $nodes
      return
        typeswitch ($node)
MRodz's avatar
MRodz committed
71
        case text() return
MRodz's avatar
MRodz committed
72
73
74
75
76
77
78
            if($node/ancestor::tei:line
            or $node/ancestor::tei:figDesc
            or $node/ancestor::tei:desc[@type = "edited_text"]
            or $node/ancestor::tei:note[@type = "editorial"]) then
                simpleHelpers:prepare-text($node)
            else
                ()
79

MRodz's avatar
MRodz committed
80
81
82
83
        case element(tei:lb) return
            if($node[@break = "keepHyphen"]) then
                ()
            else
84
                simpleHelpers:copy-element($node)
85

MRodz's avatar
MRodz committed
86
87
88
89
        case element(tei:g) return
            if($node[@ref = "#vds"]) then
                ()
            else if($node/@ref ="#rth" or $node/@ref ="#hb") then
90
                simpleHelpers:copy-element($node)
MRodz's avatar
MRodz committed
91
            else
92
                fontaneSimple:transform($node/node())
93

MRodz's avatar
MRodz committed
94
95
        case element(tei:del) return
            if($node/parent::tei:restore) then
96
                fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
97
            else if($node/descendant::tei:restore) then
98
                fontaneSimple:transform($node/descendant::tei:restore)
MRodz's avatar
MRodz committed
99
            else
MRodz's avatar
MRodz committed
100
                ()
101

MRodz's avatar
MRodz committed
102
103
104
105
106
        case element(tei:restore) return
            if(count($node/child::*) = 1 and $node/child::tei:del
            and $node/ancestor::tei:del) then
                ()
            else
107
                fontaneSimple:transform($node/node())
108

MRodz's avatar
MRodz committed
109
110
111
        case element(tei:retrace) return
            if($node/@rend) then
                element tei:seg {
Michelle Rodzis's avatar
Michelle Rodzis committed
112
                    attribute rendition {$node/@rend},
113
                    fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
114
115
                }
            else
116
                fontaneSimple:transform($node/node())
117

118
        case element(tei:add) return
119
            if($node/@type = "edited_text"
120
            or $node/child::tei:seg[@type = "multiphrase"]) then
121
                simpleHelpers:copy-element($node)
MRodz's avatar
MRodz committed
122
123
            else if($node/@cause ="catchword" or $node/@cause ="unclear") then
                ()
MRodz's avatar
MRodz committed
124
            else if($node/@rend ="|") then
125
                fontaneSimple:transform($node/node())
126
            else if(not($node/@xml:id)) then
127
                fontaneSimple:transform($node/node())
128
            else if(simpleHelpers:is-transposed($node)) then
129
130
                let $corresp := //tei:metamark[matches(@target, $node/@xml:id)]
                return
131
132
                    (fontaneSimple:transform($corresp/node()),
                    fontaneSimple:transform($node/node()))
133
            else
134
                fontaneSimple:transform($node/node())
135

136
137
        case element(tei:addSpan) return
            if($node/@type = "edited_text") then
138
                simpleHelpers:copy-element($node)
139
            else
140
141
                ()

142
143
        case element(tei:fw) return
            ()
144

145
146
147
        case element(tei:line) return
            if(count($node/*) = 1 and $node/child::tei:fw) then
                ()
MRodz's avatar
MRodz committed
148
            else if($node/@type = "heading") then
149
                simpleHelpers:make-head($node)
150

151
            else if(simpleHelpers:has-valid-style($node)
152
            or matches($node/@rendition, "black_letter")
153
            or matches($node/@rendition, "roman")) then
154
                simpleHelpers:make-seg-with-rendition($node)
155

156
            else if(not($node/@type = "item")) then
157
                fontaneSimple:transform($node/node())
158
159
            else if($node/@type = "item" and not($node/@xml:id)) then
                element tei:item {
160
                    fontaneSimple:transform($node/node())
161
                }
162
            else if($node/@type = "item"
163
            and simpleHelpers:is-transposed($node)) then
164
                ()
165
            else
166
                fontaneSimple:transform($node/node())
167

168
        case element(tei:handShift) return
MRodz's avatar
MRodz committed
169
            simpleHelpers:enhance-handshift($node)
170

171
172
        case element(tei:stamp) return
            ()
173

174
        case element(tei:seg) return
175
            if(count($node/*) = 1 and
176
177
178
            ($node/child::tei:stamp or $node/child::tei:metamark[@function = "caret"]))
                then
                    ()
179

MRodz's avatar
MRodz committed
180
            else if($node/@type = "heading") then
181
                simpleHelpers:make-head($node)
182
183

            else if(matches($node/@style, "underline")
184
            and not(matches($node/@style, "vertical-align"))) then
185
                fontaneSimple:transform($node/node())
186

187
            else if(simpleHelpers:has-valid-style($node)
188
            or matches($node/@rendition, "black_letter")
189
            or matches($node/@rendition, "roman")) then
190
191
192
193
                simpleHelpers:make-seg-with-rendition($node)

            else if($node/@type = "initials"
            or $node/@type = "monogram"
MRodz's avatar
MRodz committed
194
195
            or $node/@type = "multiphrase"
            or $node/@xml:lang)
196
                then
197
                    simpleHelpers:copy-element($node)
198
199

            else if($node/@type = "auction_number"
200
            or $node/@type = "cancel"
MRodz's avatar
MRodz committed
201
202
            or $node/@type = "abort"
            or $node/@function ="unknown")
203
204
                then
                    ()
205

206
            else
207
                fontaneSimple:transform($node/node())
208

MRodz's avatar
MRodz committed
209
        case element(tei:hi) return
210
            simpleHelpers:copy-element($node)
211
212

        (: TODO if $node/@type = "highlighted" then make
213
        a hi[@type = "vertical-mark"] in the second stage of creating the
214
        simple format. use simpleHelpers:get-xml-chunk($node) for this.:)
215
216
        case element(tei:mod) return
            if($node/@type = "highlighted"
217
218
            and simpleHelpers:is-hand-contemporary($node/@hand)) then
                simpleHelpers:copy-element($node)
219
220
            else
                fontaneSimple:transform($node/node())
221

222
        case element(tei:anchor) return
223
            simpleHelpers:copy-element($node)
224
225

        case element(tei:surface) return
MRodz's avatar
MRodz committed
226
            if(matches($node/@n, "cover")) then
227
228
                simpleHelpers:make-pb-with-type($node/@n)
            else if(simpleHelpers:is-page($node)
MRodz's avatar
MRodz committed
229
            and $node/@type = "clipping") then
230
                (simpleHelpers:make-pb($node),
MRodz's avatar
MRodz committed
231
232
                (if(not($node/@subtype = "Kalenderblatt"
                or $node/@subtype = "Zeitungsausschnitt_Fragment")) then
233
                    fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
234
235
                else
                    ()))
236
237
            else if(simpleHelpers:is-page($node)) then
                (simpleHelpers:make-pb($node),
238
                fontaneSimple:transform($node/node()))
239
            else if($node/@type = "label" and
240
            (contains($node/@subtype, "Fontane")
241
            or contains($node/@subtype, "Hersteller"))
242
            ) then
243
                simpleHelpers:make-div($node)
MRodz's avatar
MRodz committed
244
245
            else
                ()
246

MRodz's avatar
MRodz committed
247
248
249
        case element(tei:milestone) return
            if($node/@unit = "illustration") then
                ()
250
(:            else if($node/@unit = "section") then:)
251
(:                simpleHelpers:make-section($node):)
252
253
(:            else if($node/@unit = "pararaph") then:)
(:                fontaneSimple:make-paragraph($node):)
MRodz's avatar
MRodz committed
254
            else
255
                simpleHelpers:copy-element($node)
256

257
        case element(tei:gap) return
258
            simpleHelpers:copy-element($node)
259

260
        case element(tei:metamark) return
261
            if($node/@function = "integrate"
262
263
264
265
            or $node/@function = "authorial_note") then
                element tei:ab {
                    $node/@*
                }
266
            else if($node/@function = "placeholder"
267
268
269
270
271
            or $node/@function ="etc."
            or $node/@function ="caret"
            or $node/@function ="footnotes"
            or $node/@function ="ellipsis"
            or $node/@function = "paragraph") then
MRodz's avatar
MRodz committed
272
273
                element tei:ab {
                    attribute type {$node/@function},
274
                    fontaneSimple:transform($node/node())
275
                }
276
277
            else
                ()
278

279
280
281
        (: TODO: check if correct :)
        case element(tei:surplus) return
            ()
282

MRodz's avatar
MRodz committed
283
        case element(tei:zone) return
284
            if(matches($node/@style, "border-style:solid")
MRodz's avatar
MRodz committed
285
286
287
288
            and not(matches($node/@style, "border-radius"))
            and not($node/@rend = "border-style:house")) then
                element tei:div {
                    attribute type {"frame"},
289
                    fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
290
                }
291

MRodz's avatar
MRodz committed
292
            else if(matches($node/@rend, "border-bottom-style:brace")) then
293
                (fontaneSimple:transform($node/node()),
MRodz's avatar
MRodz committed
294
295
                element tei:ab {
                    attribute type {"bottom-brace"}
296
297
                })

MRodz's avatar
MRodz committed
298
299
            else if($node/@type = "cancel") then
                ()
300

MRodz's avatar
MRodz committed
301
302
303
304
            else if($node/@type = "marked_off") then
                element tei:seg {
                    $node/@type,
                    $node/@xml:id,
305
                    fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
306
                }
307

MRodz's avatar
MRodz committed
308
309
            else if($node/@type = "highlighted") then
                if($node/child::tei:zone[@type = "highlighted"]) then
310
                    fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
311
312
313
                else
                    element tei:hi {
                        attribute type {"vertical-mark"},
314
                        fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
315
                    }
316

317
318
            else if($node/@type = "illustration"
            or $node/@type = "printed_illustration") then
MRodz's avatar
MRodz committed
319
320
321
322
323
324
325
                if(not($node//tei:figure/parent::tei:del)) then
                    element {QName("http://www.tei-c.org/ns/1.0", "ab")}{
                        (if($node/child::tei:zone[@type = "illustration"]) then
                            attribute type {"composed-sketch"}
                        else
                            (attribute type {"sketch"},
                            if($node/parent::tei:zone[@type = "illustration"]) then
326
                                attribute rendition {"margin-left:" || $node/@ulx
MRodz's avatar
MRodz committed
327
328
329
330
331
                                || "cm; " || "margin-top:" || $node/@uly || "cm"}
                            else
                                ()
                            )
                        ),
332
                        fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
333
                    }
334
                else
MRodz's avatar
MRodz committed
335
                    ()
336

337
338
            else if($node/parent::tei:zone/@type = "illustration"
            or $node/parent::tei:zone/@type = "printed_illustration") then
MRodz's avatar
MRodz committed
339
340
                element {QName("http://www.tei-c.org/ns/1.0", "seg")}{
                    attribute type {"caption"},
341
                    attribute rendition {"margin-left:" || $node/@ulx || "cm; "
MRodz's avatar
MRodz committed
342
                        || "margin-top:" || $node/@uly || "cm"},
343
                    fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
344
                }
345

MRodz's avatar
MRodz committed
346
            else if($node/@type = "heading") then
347
348
                simpleHelpers:make-head($node)

MRodz's avatar
MRodz committed
349
350
            else if($node/@type = "list" or $node/@type = "item") then
                element {QName("http://www.tei-c.org/ns/1.0", $node/@type)}{
351
                    $node/(@* except (@type, @ulx, @uly, @lrx, @lry, @rotate)),
352
                    fontaneSimple:transform($node/node())
353
354
                }

MRodz's avatar
MRodz committed
355
356
            else if($node/@type = "legend") then
                element {QName("http://www.tei-c.org/ns/1.0", "div")}{
357
                    (if($node/@style
358
359
360
361
362
                    or $node/@rendition) then
                        attribute rendition {simpleHelpers:filter-rendition($node)}
                    else
                        ()),
                    $node/(@* except (@rendition, @style)),
363
                    fontaneSimple:transform($node/node())
364
365
                }

366
            else if(simpleHelpers:has-valid-style($node)
367
            or matches($node/@rendition, "black_letter")
368
            or matches($node/@rendition, "roman")) then
369
                simpleHelpers:make-seg-with-rendition($node)
370

371
            else if(not($node/@xml:id)) then
372
                fontaneSimple:transform($node/node())
373

374
            else if($node/@xml:id and simpleHelpers:is-transposed($node)) then
375
                ()
376

MRodz's avatar
MRodz committed
377
            else
378
                fontaneSimple:transform($node/node())
379

MRodz's avatar
MRodz committed
380
        case element(tei:figure) return
381
            if(count($node/child::*) = 1 and $node/child::tei:figDesc) then
382
                (: genealogy lines probably shouldn't be displayed, but I still have to
383
384
385
386
                check that. in case they should be serialized, I leave the code :)
(:                if(matches($node/descendant::tei:ref, "Stammbaumverbindungslinie")) then:)
(:                    element tei:seg {:)
(:                        $node/@*,:)
387
(:                        fontaneSimple:transform($node/node()):)
388
389
390
391
392
393
(:                    }:)
(:                else if(matches($node/descendant::tei:ref, "Schlusslinie")):)
                if(matches($node/descendant::tei:ref, "Schlusslinie"))
                    then
                        element tei:ab {
                            switch ($node/descendant::tei:ref)
394
395
396
                                case "horizontale einfache Schlusslinie" return
                                    attribute type {"long-end-line"}
                                case "Schlusslinie; horizontale Halbschleife von links oben nach rechts" return
397
                                    attribute type {"long-end-line"}
398
                                case "horizontale einfache Schlusslinie (gewellt)" return
399
                                    attribute type {"long-end-line-wavy"}
400
                                case "Schlusslinien; horizontale Schleife von links oben nach rechts unten" return
401
                                    attribute type {"bottom-brace-short"}
402
403
                                default return
                                    attribute type {"end-line"}
404
405
                        }
                else if(matches($node/descendant::tei:ref, "Absatzlinie")
406
407
                (: in case of double paragraph lines the single lines are
                encoded with "oberer" resp. "unterer Teil", but we only
408
409
410
411
412
413
414
415
416
417
                serialize the encoding for the upper line :)
                and not(matches($node/descendant::tei:ref, "unterer Teil"))
                and not(matches($node/descendant::tei:figDesc, "unsicher"))) then
                    element tei:ab {
                        if(matches($node/descendant::tei:ref, "doppelt"))
                            then
                                attribute type {"short-paragraph-line-double"}
                            else
                                attribute type {"short-paragraph-line"}
                    }
MRodz's avatar
MRodz committed
418
419
                else if($node/parent::tei:zone[@type = "illustration"]) then
                    element {QName("http://www.tei-c.org/ns/1.0", $node/name())}{
Michelle Rodzis's avatar
Michelle Rodzis committed
420
421
                        $node/@*,
                        fontaneSimple:transform($nodes/node())
MRodz's avatar
MRodz committed
422
                    }
423
                else
MRodz's avatar
MRodz committed
424
425
                    ()
            else
426
                simpleHelpers:copy-element($node)
427

MRodz's avatar
MRodz committed
428
        case element(tei:note) return
429
            if($node/@type = "authorial"
430
            and not($node/@subtype = "footnote")) then
MRodz's avatar
MRodz committed
431
                ()
432
            else
433
                simpleHelpers:copy-element($node)
434

MRodz's avatar
MRodz committed
435
436
437
438
439
440
        case element(tei:certainty) return
            element {QName("http://www.tei-c.org/ns/1.0", "note")}{
                attribute type {"editorial"},
                attribute subtype {"certainty"},
                $node/@cert,
                $node/@target,
441
                fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
442
            }
443

MRodz's avatar
MRodz committed
444
        case element(tei:figDesc) return
445
            simpleHelpers:copy-element($node)
446

MRodz's avatar
MRodz committed
447
        case element(tei:ref) return
448
            simpleHelpers:copy-element($node)
449

MRodz's avatar
MRodz committed
450
        case element(tei:space) return
451
            simpleHelpers:copy-element($node)
452

MRodz's avatar
MRodz committed
453
        case element(tei:choice) return
454
            simpleHelpers:copy-element($node)
455

MRodz's avatar
MRodz committed
456
        case element(tei:abbr) return
457
            simpleHelpers:copy-element($node)
458

MRodz's avatar
MRodz committed
459
        case element(tei:expan) return
460
461
            simpleHelpers:copy-element($node)

MRodz's avatar
MRodz committed
462
        case element(tei:rs) return
463
464
            simpleHelpers:copy-element($node)

MRodz's avatar
MRodz committed
465
        case element(tei:date) return
466
            element {QName("http://www.tei-c.org/ns/1.0", $node/name())}{
467
468
                (if($node/@when-iso) then
                    attribute when {$node/@when-iso}
469
470
                else
                    ()),
471
472
                (if($node/@from-iso) then
                    attribute from {$node/@from-iso}
473
474
                else
                    ()),
475
476
                (if($node/@to-iso) then
                    attribute to {$node/@to-iso}
Michelle Rodzis's avatar
Michelle Rodzis committed
477
478
479
480
                else
                    ()),                
                (if($node/@notAfter-iso) then
                    attribute notAfter {$node/@notAfter-iso}
481
                else
482
                    ()),
Michelle Rodzis's avatar
Michelle Rodzis committed
483
                $node/(@* except (@when-iso, @to-iso, @from-iso, @notAfter-iso)),
484
                fontaneSimple:transform($node/node())
485
            }
486

487
        case element(tei:ptr) return
Michelle Rodzis's avatar
Michelle Rodzis committed
488
            text{"test"}
489

MRodz's avatar
MRodz committed
490
        default return
491
            fontaneSimple:transform($node/node())
492
};