tei2teisimple.xqm 19.4 KB
Newer Older
MRodz's avatar
MRodz committed
1
2
xquery version "3.1";

3
(:~
Michelle Rodzis's avatar
Michelle Rodzis committed
4
5
 : This modules handles the conversion of the Fontante-TEI/XML into TEI simplePrint
 : for the edited text. The resulting TEI simplePrint is the basis for the "Editerter
6
 : Text" (edited text) view on the website and the book. It represents the latest
Michelle Rodzis's avatar
Michelle Rodzis committed
7
 : layer of text.
8
 :
MRodz's avatar
MRodz committed
9
10
 : @author Michelle Rodzis
 : @version 0.1
Michelle Rodzis's avatar
Michelle Rodzis committed
11
 : @since TODO
MRodz's avatar
MRodz committed
12
13
 :)

14
module namespace fontaneSimple="http://fontane-nb.dariah.eu/teisimple";
MRodz's avatar
MRodz committed
15

MRodz's avatar
MRodz committed
16

MRodz's avatar
MRodz committed
17
18
19
20
declare namespace tei="http://www.tei-c.org/ns/1.0";
declare namespace test="http://exist-db.org/xquery/xqsuite";

import module namespace console="http://exist-db.org/xquery/console";
21
import module namespace simpleHelpers="http://fontane-nb.dariah.eu/teisimplehelpers" at "teisimplehelpers.xqm";
MRodz's avatar
MRodz committed
22

23
(:~
MRodz's avatar
MRodz committed
24
 : The main function initiates the transformation of a given notebook.
25
 :
MRodz's avatar
MRodz committed
26
 : TODO: adapt to several input files?
27
 :
MRodz's avatar
MRodz committed
28
 : :)
29
declare function fontaneSimple:main($file as xs:string) as xs:string? {
30
  let $doc :=
MRodz's avatar
MRodz committed
31
32
33
34
35
    try {
      (doc("/db/sade-projects/textgrid/data/xml/data/" || $file))
    } catch * {
      (console:log("It was not possible to open the requested file " || $file))
    }
36

MRodz's avatar
Fix #23    
MRodz committed
37
38
    let $front-covers := $doc//tei:sourceDoc/tei:surface[contains(@n, "front_cover")]
    let $back-covers := $doc//tei:sourceDoc/tei:surface[contains(@n, "back_cover")]
39
    let $content := $doc//tei:sourceDoc/tei:surface[not(contains(@n, "cover")
MRodz's avatar
MRodz committed
40
        or matches(@n, "spine"))]
41

42
    let $tei := <TEI xmlns="http://www.tei-c.org/ns/1.0">
MRodz's avatar
MRodz committed
43
        {$doc//tei:teiHeader}
MRodz's avatar
MRodz committed
44
        <text>
45
46
47
            <front>{fontaneSimple:transform($front-covers)}</front>
            <body>{fontaneSimple:transform($content)}</body>
            <back>{fontaneSimple:transform($back-covers)}</back>
MRodz's avatar
MRodz committed
48
        </text>
MRodz's avatar
MRodz committed
49
      </TEI>
50
    return xmldb:store("/db/apps/SADE/resources/xml/", "tei-simple.xml", $tei)
MRodz's avatar
MRodz committed
51
52
};

53
(:~
54
55
56
57
58
59
60
61
62
 : Recursivly iterates the passed nodes and converts them according to the
 : requirements for the "Edierter Text". While it convers almost all of the
 : requirements stated in the encoding documentation (c.f.
 : https://fontane-nb.dariah.eu/doku.html), some parts of it are handled in a
 : second step (c.f. TODO) - especially the removal of tei:handShift duplicates
 : and the tei:milestone expansion to tei:div[@type = "section"] resp. tei:p -
 : because it is easier to perform these steps after the XML hierarchy has been
 : flattened a bit.
 :
MRodz's avatar
MRodz committed
63
 : @author Michelle Rodzis
64
 : @param $nodes the elements of the book covers and the book content
Michelle Rodzis's avatar
Michelle Rodzis committed
65
 : @return $node()* a TEI simplePrint element
MRodz's avatar
MRodz committed
66
 :)
67
declare function fontaneSimple:transform($nodes as node()*) as node()* {
MRodz's avatar
MRodz committed
68
69
70
    for $node in $nodes
      return
        typeswitch ($node)
MRodz's avatar
MRodz committed
71
        case text() return
MRodz's avatar
MRodz committed
72
73
74
75
76
77
78
            if($node/ancestor::tei:line
            or $node/ancestor::tei:figDesc
            or $node/ancestor::tei:desc[@type = "edited_text"]
            or $node/ancestor::tei:note[@type = "editorial"]) then
                simpleHelpers:prepare-text($node)
            else
                ()
79

MRodz's avatar
MRodz committed
80
81
82
83
        case element(tei:lb) return
            if($node[@break = "keepHyphen"]) then
                ()
            else
84
                simpleHelpers:copy-element($node)
85

MRodz's avatar
MRodz committed
86
87
88
89
        case element(tei:g) return
            if($node[@ref = "#vds"]) then
                ()
            else if($node/@ref ="#rth" or $node/@ref ="#hb") then
90
                simpleHelpers:copy-element($node)
MRodz's avatar
MRodz committed
91
            else
92
                fontaneSimple:transform($node/node())
93

MRodz's avatar
MRodz committed
94
95
        case element(tei:del) return
            if($node/parent::tei:restore) then
96
                fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
97
            else if($node/descendant::tei:restore) then
98
                fontaneSimple:transform($node/descendant::tei:restore)
MRodz's avatar
MRodz committed
99
            else
MRodz's avatar
MRodz committed
100
                ()
101

MRodz's avatar
MRodz committed
102
103
104
105
106
        case element(tei:restore) return
            if(count($node/child::*) = 1 and $node/child::tei:del
            and $node/ancestor::tei:del) then
                ()
            else
107
                fontaneSimple:transform($node/node())
108

MRodz's avatar
MRodz committed
109
110
111
        case element(tei:retrace) return
            if($node/@rend) then
                element tei:seg {
Michelle Rodzis's avatar
Michelle Rodzis committed
112
                    attribute rendition {$node/@rend},
113
                    fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
114
115
                }
            else
116
                fontaneSimple:transform($node/node())
117

118
        case element(tei:add) return
119
            if($node/@type = "edited_text"
120
            or $node/child::tei:seg[@type = "multiphrase"]) then
121
                simpleHelpers:copy-element($node)
MRodz's avatar
MRodz committed
122
123
            else if($node/@cause ="catchword" or $node/@cause ="unclear") then
                ()
MRodz's avatar
MRodz committed
124
            else if($node/@rend ="|") then
125
                fontaneSimple:transform($node/node())
126
            else if(not($node/@xml:id)) then
127
                fontaneSimple:transform($node/node())
128
            else if(simpleHelpers:is-transposed($node)) then
129
130
                let $corresp := //tei:metamark[matches(@target, $node/@xml:id)]
                return
131
132
                    (fontaneSimple:transform($corresp/node()),
                    fontaneSimple:transform($node/node()))
133
            else
134
                fontaneSimple:transform($node/node())
135

136
137
        case element(tei:addSpan) return
            if($node/@type = "edited_text") then
138
                simpleHelpers:copy-element($node)
139
            else
140
141
                ()

142
143
        case element(tei:fw) return
            ()
144

145
146
147
        case element(tei:line) return
            if(count($node/*) = 1 and $node/child::tei:fw) then
                ()
MRodz's avatar
MRodz committed
148
            else if($node/@type = "heading") then
149
                simpleHelpers:make-head($node)
150

151
152
153
154
155
            else if((simpleHelpers:has-valid-style($node)
            or matches($node/@rendition, "black_letter")
            or matches($node/@rendition, "roman"))
            and simpleHelpers:has-valid-text($node)) then
                simpleHelpers:make-seg-with-rendition($node)
156

157
            else if(not($node/@type = "item")) then
158
                fontaneSimple:transform($node/node())
159
160
            else if($node/@type = "item" and not($node/@xml:id)) then
                element tei:item {
161
                    fontaneSimple:transform($node/node())
162
                }
163
            else if($node/@type = "item"
164
            and simpleHelpers:is-transposed($node)) then
165
                ()
166
            else
167
                fontaneSimple:transform($node/node())
168

169
        case element(tei:handShift) return
MRodz's avatar
MRodz committed
170
171
172
173
            if($node/@new) then
                simpleHelpers:copy-element($node)
            else
                simpleHelpers:enhance-handshift($node)
174

175
176
        case element(tei:stamp) return
            ()
177

178
        case element(tei:seg) return
179
            if(count($node/*) = 1 and
180
181
182
            ($node/child::tei:stamp or $node/child::tei:metamark[@function = "caret"]))
                then
                    ()
183

MRodz's avatar
MRodz committed
184
            else if($node/@type = "heading") then
185
                simpleHelpers:make-head($node)
186
187

            else if(matches($node/@style, "underline")
188
            and not(matches($node/@style, "vertical-align"))) then
189
                fontaneSimple:transform($node/node())
190

191
192
193
194
            else if((simpleHelpers:has-valid-style($node)
            or matches($node/@rendition, "black_letter")
            or matches($node/@rendition, "roman"))
            and simpleHelpers:has-valid-text($node)) then
195
196
197
198
                simpleHelpers:make-seg-with-rendition($node)

            else if($node/@type = "initials"
            or $node/@type = "monogram"
MRodz's avatar
MRodz committed
199
200
            or $node/@type = "multiphrase"
            or $node/@xml:lang)
201
                then
202
                    simpleHelpers:copy-element($node)
203
204

            else if($node/@type = "auction_number"
205
            or $node/@type = "cancel"
MRodz's avatar
MRodz committed
206
207
            or $node/@type = "abort"
            or $node/@function ="unknown")
208
209
                then
                    ()
210

211
            else
212
                fontaneSimple:transform($node/node())
213

MRodz's avatar
MRodz committed
214
        case element(tei:hi) return
215
            simpleHelpers:copy-element($node)
216
217

        (: TODO if $node/@type = "highlighted" then make
218
        a hi[@type = "vertical-mark"] in the second stage of creating the
219
        simple format. use simpleHelpers:get-xml-chunk($node) for this.:)
220
221
        case element(tei:mod) return
            if($node/@type = "highlighted"
222
223
            and simpleHelpers:is-hand-contemporary($node/@hand)) then
                simpleHelpers:copy-element($node)
224
225
            else
                fontaneSimple:transform($node/node())
226

227
        case element(tei:anchor) return
228
            simpleHelpers:copy-element($node)
229
230

        case element(tei:surface) return
MRodz's avatar
MRodz committed
231
            if(matches($node/@n, "cover")) then
232
233
                simpleHelpers:make-pb-with-type($node/@n)
            else if(simpleHelpers:is-page($node)
MRodz's avatar
MRodz committed
234
            and $node/@type = "clipping") then
235
                (simpleHelpers:make-pb($node),
MRodz's avatar
MRodz committed
236
237
                (if(not($node/@subtype = "Kalenderblatt"
                or $node/@subtype = "Zeitungsausschnitt_Fragment")) then
238
                    fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
239
240
                else
                    ()))
241
242
            else if(simpleHelpers:is-page($node)) then
                (simpleHelpers:make-pb($node),
243
                fontaneSimple:transform($node/node()))
244
            else if($node/@type = "label" and
245
            (contains($node/@subtype, "Fontane")
246
            or contains($node/@subtype, "Hersteller"))
247
            ) then
248
                simpleHelpers:make-div($node)
MRodz's avatar
MRodz committed
249
250
            else
                ()
251

MRodz's avatar
MRodz committed
252
253
254
        case element(tei:milestone) return
            if($node/@unit = "illustration") then
                ()
255
(:            else if($node/@unit = "section") then:)
256
(:                simpleHelpers:make-section($node):)
257
258
(:            else if($node/@unit = "pararaph") then:)
(:                fontaneSimple:make-paragraph($node):)
MRodz's avatar
MRodz committed
259
            else
260
                simpleHelpers:copy-element($node)
261

262
        case element(tei:gap) return
263
            simpleHelpers:copy-element($node)
264

265
        case element(tei:metamark) return
266
            if($node/@function = "integrate"
267
268
269
270
            or $node/@function = "authorial_note") then
                element tei:ab {
                    $node/@*
                }
271
            else if($node/@function = "placeholder"
272
273
274
275
276
            or $node/@function ="etc."
            or $node/@function ="caret"
            or $node/@function ="footnotes"
            or $node/@function ="ellipsis"
            or $node/@function = "paragraph") then
MRodz's avatar
MRodz committed
277
278
                element tei:ab {
                    attribute type {$node/@function},
279
                    fontaneSimple:transform($node/node())
280
                }
281
282
            else
                ()
283

284
285
286
        (: TODO: check if correct :)
        case element(tei:surplus) return
            ()
287

MRodz's avatar
MRodz committed
288
        case element(tei:zone) return
289
            if(matches($node/@style, "border-style:solid")
MRodz's avatar
MRodz committed
290
291
292
293
            and not(matches($node/@style, "border-radius"))
            and not($node/@rend = "border-style:house")) then
                element tei:div {
                    attribute type {"frame"},
294
                    fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
295
                }
296

MRodz's avatar
MRodz committed
297
            else if(matches($node/@rend, "border-bottom-style:brace")) then
298
                (fontaneSimple:transform($node/node()),
MRodz's avatar
MRodz committed
299
300
                element tei:ab {
                    attribute type {"bottom-brace"}
301
302
                })

MRodz's avatar
MRodz committed
303
304
            else if($node/@type = "cancel") then
                ()
305

MRodz's avatar
MRodz committed
306
307
308
309
            else if($node/@type = "marked_off") then
                element tei:seg {
                    $node/@type,
                    $node/@xml:id,
310
                    fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
311
                }
312

MRodz's avatar
MRodz committed
313
314
            else if($node/@type = "highlighted") then
                if($node/child::tei:zone[@type = "highlighted"]) then
315
                    fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
316
317
318
                else
                    element tei:hi {
                        attribute type {"vertical-mark"},
319
                        fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
320
                    }
321

322
323
            else if($node/@type = "illustration"
            or $node/@type = "printed_illustration") then
MRodz's avatar
MRodz committed
324
325
326
327
328
329
330
                if(not($node//tei:figure/parent::tei:del)) then
                    element {QName("http://www.tei-c.org/ns/1.0", "ab")}{
                        (if($node/child::tei:zone[@type = "illustration"]) then
                            attribute type {"composed-sketch"}
                        else
                            (attribute type {"sketch"},
                            if($node/parent::tei:zone[@type = "illustration"]) then
331
                                attribute rendition {"margin-left:" || $node/@ulx
MRodz's avatar
MRodz committed
332
333
334
335
336
                                || "cm; " || "margin-top:" || $node/@uly || "cm"}
                            else
                                ()
                            )
                        ),
337
                        fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
338
                    }
339
                else
MRodz's avatar
MRodz committed
340
                    ()
341

342
343
            else if($node/parent::tei:zone/@type = "illustration"
            or $node/parent::tei:zone/@type = "printed_illustration") then
MRodz's avatar
MRodz committed
344
345
                element {QName("http://www.tei-c.org/ns/1.0", "seg")}{
                    attribute type {"caption"},
346
                    attribute rendition {"margin-left:" || $node/@ulx || "cm; "
MRodz's avatar
MRodz committed
347
                        || "margin-top:" || $node/@uly || "cm"},
348
                    fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
349
                }
350

MRodz's avatar
MRodz committed
351
            else if($node/@type = "heading") then
352
353
                simpleHelpers:make-head($node)

MRodz's avatar
MRodz committed
354
355
            else if($node/@type = "list" or $node/@type = "item") then
                element {QName("http://www.tei-c.org/ns/1.0", $node/@type)}{
356
                    $node/(@* except (@type, @ulx, @uly, @lrx, @lry, @rotate)),
357
                    fontaneSimple:transform($node/node())
358
359
                }

MRodz's avatar
MRodz committed
360
361
            else if($node/@type = "legend") then
                element {QName("http://www.tei-c.org/ns/1.0", "div")}{
362
                    (if($node/@style
363
364
365
366
367
                    or $node/@rendition) then
                        attribute rendition {simpleHelpers:filter-rendition($node)}
                    else
                        ()),
                    $node/(@* except (@rendition, @style)),
368
                    fontaneSimple:transform($node/node())
369
370
                }

371
372
373
374
375
            else if((simpleHelpers:has-valid-style($node)
            or matches($node/@rendition, "black_letter")
            or matches($node/@rendition, "roman"))
            and simpleHelpers:has-valid-text($node)) then
                simpleHelpers:make-seg-with-rendition($node)
376

377
            else if(not($node/@xml:id)) then
378
                fontaneSimple:transform($node/node())
379

380
            else if($node/@xml:id and simpleHelpers:is-transposed($node)) then
381
                ()
382

MRodz's avatar
MRodz committed
383
            else
384
                fontaneSimple:transform($node/node())
385

MRodz's avatar
MRodz committed
386
        case element(tei:figure) return
387
            if(count($node/child::*) = 1 and $node/child::tei:figDesc) then
388
                (: genealogy lines probably shouldn't be displayed, but I still have to
389
390
391
392
                check that. in case they should be serialized, I leave the code :)
(:                if(matches($node/descendant::tei:ref, "Stammbaumverbindungslinie")) then:)
(:                    element tei:seg {:)
(:                        $node/@*,:)
393
(:                        fontaneSimple:transform($node/node()):)
394
395
396
397
398
399
(:                    }:)
(:                else if(matches($node/descendant::tei:ref, "Schlusslinie")):)
                if(matches($node/descendant::tei:ref, "Schlusslinie"))
                    then
                        element tei:ab {
                            switch ($node/descendant::tei:ref)
400
401
402
                                case "horizontale einfache Schlusslinie" return
                                    attribute type {"long-end-line"}
                                case "Schlusslinie; horizontale Halbschleife von links oben nach rechts" return
403
                                    attribute type {"long-end-line"}
404
                                case "horizontale einfache Schlusslinie (gewellt)" return
405
                                    attribute type {"long-end-line-wavy"}
406
                                case "Schlusslinien; horizontale Schleife von links oben nach rechts unten" return
407
                                    attribute type {"bottom-brace-short"}
408
409
                                default return
                                    attribute type {"end-line"}
410
411
                        }
                else if(matches($node/descendant::tei:ref, "Absatzlinie")
412
413
                (: in case of double paragraph lines the single lines are
                encoded with "oberer" resp. "unterer Teil", but we only
414
415
416
417
418
419
420
421
422
423
                serialize the encoding for the upper line :)
                and not(matches($node/descendant::tei:ref, "unterer Teil"))
                and not(matches($node/descendant::tei:figDesc, "unsicher"))) then
                    element tei:ab {
                        if(matches($node/descendant::tei:ref, "doppelt"))
                            then
                                attribute type {"short-paragraph-line-double"}
                            else
                                attribute type {"short-paragraph-line"}
                    }
MRodz's avatar
MRodz committed
424
425
                else if($node/parent::tei:zone[@type = "illustration"]) then
                    element {QName("http://www.tei-c.org/ns/1.0", $node/name())}{
Michelle Rodzis's avatar
Michelle Rodzis committed
426
427
                        $node/@*,
                        fontaneSimple:transform($nodes/node())
MRodz's avatar
MRodz committed
428
                    }
429
                else
MRodz's avatar
MRodz committed
430
431
                    ()
            else
432
                simpleHelpers:copy-element($node)
433

MRodz's avatar
MRodz committed
434
        case element(tei:note) return
435
            if($node/@type = "authorial"
436
            and not($node/@subtype = "footnote")) then
MRodz's avatar
MRodz committed
437
                ()
438
            else
439
                simpleHelpers:copy-element($node)
440

MRodz's avatar
MRodz committed
441
442
443
444
445
446
        case element(tei:certainty) return
            element {QName("http://www.tei-c.org/ns/1.0", "note")}{
                attribute type {"editorial"},
                attribute subtype {"certainty"},
                $node/@cert,
                $node/@target,
447
                fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
448
            }
449

MRodz's avatar
MRodz committed
450
        case element(tei:figDesc) return
451
            simpleHelpers:copy-element($node)
452

MRodz's avatar
MRodz committed
453
        case element(tei:ref) return
454
            simpleHelpers:copy-element($node)
455

MRodz's avatar
MRodz committed
456
        case element(tei:space) return
457
            simpleHelpers:copy-element($node)
458

MRodz's avatar
MRodz committed
459
        case element(tei:choice) return
460
            simpleHelpers:copy-element($node)
461

MRodz's avatar
MRodz committed
462
        case element(tei:abbr) return
463
            simpleHelpers:copy-element($node)
464

MRodz's avatar
MRodz committed
465
        case element(tei:expan) return
466
467
            simpleHelpers:copy-element($node)

MRodz's avatar
MRodz committed
468
        case element(tei:rs) return
469
470
            simpleHelpers:copy-element($node)

MRodz's avatar
MRodz committed
471
        case element(tei:date) return
472
            element {QName("http://www.tei-c.org/ns/1.0", $node/name())}{
473
474
                (if($node/@when-iso) then
                    attribute when {$node/@when-iso}
475
476
                else
                    ()),
477
478
                (if($node/@from-iso) then
                    attribute from {$node/@from-iso}
479
480
                else
                    ()),
481
482
                (if($node/@to-iso) then
                    attribute to {$node/@to-iso}
Michelle Rodzis's avatar
Michelle Rodzis committed
483
484
485
486
                else
                    ()),                
                (if($node/@notAfter-iso) then
                    attribute notAfter {$node/@notAfter-iso}
487
                else
488
                    ()),
Michelle Rodzis's avatar
Michelle Rodzis committed
489
                $node/(@* except (@when-iso, @to-iso, @from-iso, @notAfter-iso)),
490
                fontaneSimple:transform($node/node())
491
            }
492

493
        case element(tei:ptr) return
Michelle Rodzis's avatar
Michelle Rodzis committed
494
            text{"test"}
495

MRodz's avatar
MRodz committed
496
        default return
497
            fontaneSimple:transform($node/node())
498
};