tei2teisimple.xqm 19.7 KB
Newer Older
MRodz's avatar
MRodz committed
1
2
xquery version "3.1";

3
(:~
Michelle Rodzis's avatar
Michelle Rodzis committed
4
5
 : This modules handles the conversion of the Fontante-TEI/XML into TEI simplePrint
 : for the edited text. The resulting TEI simplePrint is the basis for the "Editerter
6
 : Text" (edited text) view on the website and the book. It represents the latest
Michelle Rodzis's avatar
Michelle Rodzis committed
7
 : layer of text.
8
 :
MRodz's avatar
MRodz committed
9
10
 : @author Michelle Rodzis
 : @version 0.1
Michelle Rodzis's avatar
Michelle Rodzis committed
11
 : @since TODO
MRodz's avatar
MRodz committed
12
13
 :)

14
module namespace fontaneSimple="http://fontane-nb.dariah.eu/teisimple";
MRodz's avatar
MRodz committed
15

MRodz's avatar
MRodz committed
16

MRodz's avatar
MRodz committed
17
18
19
20
declare namespace tei="http://www.tei-c.org/ns/1.0";
declare namespace test="http://exist-db.org/xquery/xqsuite";

import module namespace console="http://exist-db.org/xquery/console";
21
import module namespace simpleHelpers="http://fontane-nb.dariah.eu/teisimplehelpers" at "teisimplehelpers.xqm";
MRodz's avatar
MRodz committed
22

23
(:~
MRodz's avatar
MRodz committed
24
 : The main function initiates the transformation of a given notebook.
25
 :
MRodz's avatar
MRodz committed
26
 : TODO: adapt to several input files?
27
 :
MRodz's avatar
MRodz committed
28
 : :)
29
declare function fontaneSimple:main($file as xs:string) as xs:string? {
30
  let $doc :=
MRodz's avatar
MRodz committed
31
32
33
34
35
    try {
      (doc("/db/sade-projects/textgrid/data/xml/data/" || $file))
    } catch * {
      (console:log("It was not possible to open the requested file " || $file))
    }
36

MRodz's avatar
Fix #23    
MRodz committed
37
38
    let $front-covers := $doc//tei:sourceDoc/tei:surface[contains(@n, "front_cover")]
    let $back-covers := $doc//tei:sourceDoc/tei:surface[contains(@n, "back_cover")]
39
    let $content := $doc//tei:sourceDoc/tei:surface[not(contains(@n, "cover")
MRodz's avatar
MRodz committed
40
        or matches(@n, "spine"))]
41

42
    let $tei := <TEI xmlns="http://www.tei-c.org/ns/1.0">
MRodz's avatar
MRodz committed
43
        {$doc//tei:teiHeader}
MRodz's avatar
MRodz committed
44
        <text>
45
46
47
            <front>{fontaneSimple:transform($front-covers)}</front>
            <body>{fontaneSimple:transform($content)}</body>
            <back>{fontaneSimple:transform($back-covers)}</back>
MRodz's avatar
MRodz committed
48
        </text>
MRodz's avatar
MRodz committed
49
      </TEI>
50
    return xmldb:store("/db/apps/SADE/resources/xml/", "tei-simple.xml", $tei)
MRodz's avatar
MRodz committed
51
52
};

53
(:~
54
55
56
57
58
59
60
61
62
 : Recursivly iterates the passed nodes and converts them according to the
 : requirements for the "Edierter Text". While it convers almost all of the
 : requirements stated in the encoding documentation (c.f.
 : https://fontane-nb.dariah.eu/doku.html), some parts of it are handled in a
 : second step (c.f. TODO) - especially the removal of tei:handShift duplicates
 : and the tei:milestone expansion to tei:div[@type = "section"] resp. tei:p -
 : because it is easier to perform these steps after the XML hierarchy has been
 : flattened a bit.
 :
MRodz's avatar
MRodz committed
63
 : @author Michelle Rodzis
64
 : @param $nodes the elements of the book covers and the book content
Michelle Rodzis's avatar
Michelle Rodzis committed
65
 : @return $node()* a TEI simplePrint element
MRodz's avatar
MRodz committed
66
 :)
67
declare function fontaneSimple:transform($nodes as node()*) as node()* {
MRodz's avatar
MRodz committed
68
69
70
    for $node in $nodes
      return
        typeswitch ($node)
MRodz's avatar
MRodz committed
71
        case text() return
72
            simpleHelpers:prepare-text($node)
73

MRodz's avatar
MRodz committed
74
75
76
77
        case element(tei:lb) return
            if($node[@break = "keepHyphen"]) then
                ()
            else
78
                simpleHelpers:copy-element($node)
79

MRodz's avatar
MRodz committed
80
81
82
83
        case element(tei:g) return
            if($node[@ref = "#vds"]) then
                ()
            else if($node/@ref ="#rth" or $node/@ref ="#hb") then
84
                simpleHelpers:copy-element($node)
MRodz's avatar
MRodz committed
85
            else
86
                fontaneSimple:transform($node/node())
87

MRodz's avatar
MRodz committed
88
89
        case element(tei:del) return
            if($node/parent::tei:restore) then
90
                fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
91
            else if($node/descendant::tei:restore) then
92
                fontaneSimple:transform($node/descendant::tei:restore)
MRodz's avatar
MRodz committed
93
            else
MRodz's avatar
MRodz committed
94
                ()
95

MRodz's avatar
MRodz committed
96
97
98
99
100
        case element(tei:restore) return
            if(count($node/child::*) = 1 and $node/child::tei:del
            and $node/ancestor::tei:del) then
                ()
            else
101
                fontaneSimple:transform($node/node())
102

MRodz's avatar
MRodz committed
103
104
105
        case element(tei:retrace) return
            if($node/@rend) then
                element tei:seg {
Michelle Rodzis's avatar
Michelle Rodzis committed
106
                    attribute rendition {$node/@rend},
107
                    fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
108
109
                }
            else
110
                fontaneSimple:transform($node/node())
111

112
        case element(tei:add) return
113
            if($node/@type = "edited_text"
114
            or $node/child::tei:seg[@type = "multiphrase"]) then
115
                simpleHelpers:copy-element($node)
MRodz's avatar
MRodz committed
116
117
            else if($node/@cause ="catchword" or $node/@cause ="unclear") then
                ()
MRodz's avatar
MRodz committed
118
            else if($node/@rend ="|") then
119
                fontaneSimple:transform($node/node())
120
            else if(not($node/@xml:id)) then
121
                fontaneSimple:transform($node/node())
122
            else if(simpleHelpers:is-transposed($node)) then
123
124
                let $corresp := //tei:metamark[matches(@target, $node/@xml:id)]
                return
125
126
                    (fontaneSimple:transform($corresp/node()),
                    fontaneSimple:transform($node/node()))
127
            else
128
                fontaneSimple:transform($node/node())
129

130
131
        case element(tei:addSpan) return
            if($node/@type = "edited_text") then
132
                simpleHelpers:copy-element($node)
133
            else
134
135
                ()

136
137
        case element(tei:fw) return
            ()
138

139
140
141
        case element(tei:line) return
            if(count($node/*) = 1 and $node/child::tei:fw) then
                ()
MRodz's avatar
MRodz committed
142
            else if($node/@type = "heading") then
143
                simpleHelpers:make-head($node)
144

145
146
147
148
149
            else if((simpleHelpers:has-valid-style($node)
            or matches($node/@rendition, "black_letter")
            or matches($node/@rendition, "roman"))
            and simpleHelpers:has-valid-text($node)) then
                simpleHelpers:make-seg-with-rendition($node)
150

151
            else if(not($node/@type = "item")) then
152
                fontaneSimple:transform($node/node())
153
154
            else if($node/@type = "item" and not($node/@xml:id)) then
                element tei:item {
155
                    fontaneSimple:transform($node/node())
156
                }
157
            else if($node/@type = "item"
158
            and simpleHelpers:is-transposed($node)) then
159
                ()
160
            else
161
                fontaneSimple:transform($node/node())
162

163
        case element(tei:handShift) return
164
            if($node/@new) then
165
166
167
                if(simpleHelpers:is-hand-valid($node)
                and not(simpleHelpers:is-prev-valid-hand-same($node))) then
                    simpleHelpers:copy-element($node)
168
169
                else
                    ()
170
            else
171
172
                (: tei:simpleHelpers:copy-element($node)ft without @new denote
                the ductus of the current writer or a change of writing medium :)
173
174
                if(simpleHelpers:is-hand-valid($node/preceding::tei:handShift[@new][1])) then
                    simpleHelpers:copy-element($node)
175
176
177
                else
                    ()

178
179
        case element(tei:stamp) return
            ()
180

181
        case element(tei:seg) return
182
            if(count($node/*) = 1 and
183
184
185
            ($node/child::tei:stamp or $node/child::tei:metamark[@function = "caret"]))
                then
                    ()
186

MRodz's avatar
MRodz committed
187
            else if($node/@type = "heading") then
188
                simpleHelpers:make-head($node)
189
190

            else if(matches($node/@style, "underline")
191
            and not(matches($node/@style, "vertical-align"))) then
192
                fontaneSimple:transform($node/node())
193

194
195
196
197
            else if((simpleHelpers:has-valid-style($node)
            or matches($node/@rendition, "black_letter")
            or matches($node/@rendition, "roman"))
            and simpleHelpers:has-valid-text($node)) then
198
199
200
201
                simpleHelpers:make-seg-with-rendition($node)

            else if($node/@type = "initials"
            or $node/@type = "monogram"
MRodz's avatar
MRodz committed
202
203
            or $node/@type = "multiphrase"
            or $node/@xml:lang)
204
                then
205
                    simpleHelpers:copy-element($node)
206
207

            else if($node/@type = "auction_number"
208
            or $node/@type = "cancel"
MRodz's avatar
MRodz committed
209
210
            or $node/@type = "abort"
            or $node/@function ="unknown")
211
212
                then
                    ()
213

214
            else
215
                fontaneSimple:transform($node/node())
216

MRodz's avatar
MRodz committed
217
        case element(tei:hi) return
218
            simpleHelpers:copy-element($node)
219
220

        (: TODO if $node/@type = "highlighted" then make
221
        a hi[@type = "vertical-mark"] in the second stage of creating the
222
        simple format. use simpleHelpers:get-xml-chunk($node) for this.:)
223
224
        case element(tei:mod) return
            if($node/@type = "highlighted"
225
226
            and simpleHelpers:is-hand-contemporary($node/@hand)) then
                simpleHelpers:copy-element($node)
227
228
            else
                fontaneSimple:transform($node/node())
229

230
        case element(tei:anchor) return
231
            simpleHelpers:copy-element($node)
232
233

        case element(tei:surface) return
MRodz's avatar
MRodz committed
234
            if(matches($node/@n, "cover")) then
235
236
                simpleHelpers:make-pb-with-type($node/@n)
            else if(simpleHelpers:is-page($node)
MRodz's avatar
MRodz committed
237
            and $node/@type = "clipping") then
238
                (simpleHelpers:make-pb($node),
MRodz's avatar
MRodz committed
239
240
                (if(not($node/@subtype = "Kalenderblatt"
                or $node/@subtype = "Zeitungsausschnitt_Fragment")) then
241
                    fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
242
243
                else
                    ()))
244
245
            else if(simpleHelpers:is-page($node)) then
                (simpleHelpers:make-pb($node),
246
                fontaneSimple:transform($node/node()))
247
            else if($node/@type = "label" and
248
            (contains($node/@subtype, "Fontane")
249
            or contains($node/@subtype, "Hersteller"))
250
            ) then
251
                simpleHelpers:make-div($node)
MRodz's avatar
MRodz committed
252
253
            else
                ()
254

MRodz's avatar
MRodz committed
255
256
257
        case element(tei:milestone) return
            if($node/@unit = "illustration") then
                ()
258
(:            else if($node/@unit = "section") then:)
259
(:                simpleHelpers:make-section($node):)
260
261
(:            else if($node/@unit = "pararaph") then:)
(:                fontaneSimple:make-paragraph($node):)
MRodz's avatar
MRodz committed
262
            else
263
                simpleHelpers:copy-element($node)
264

265
        case element(tei:gap) return
266
            simpleHelpers:copy-element($node)
267

268
        case element(tei:metamark) return
269
            if($node/@function = "integrate"
270
271
272
273
            or $node/@function = "authorial_note") then
                element tei:ab {
                    $node/@*
                }
274
            else if($node/@function = "placeholder"
275
276
277
278
279
            or $node/@function ="etc."
            or $node/@function ="caret"
            or $node/@function ="footnotes"
            or $node/@function ="ellipsis"
            or $node/@function = "paragraph") then
MRodz's avatar
MRodz committed
280
281
                element tei:ab {
                    attribute type {$node/@function},
282
                    fontaneSimple:transform($node/node())
283
                }
284
285
            else
                ()
286

287
288
289
        (: TODO: check if correct :)
        case element(tei:surplus) return
            ()
290

MRodz's avatar
MRodz committed
291
        case element(tei:zone) return
292
            if(matches($node/@style, "border-style:solid")
MRodz's avatar
MRodz committed
293
294
295
296
            and not(matches($node/@style, "border-radius"))
            and not($node/@rend = "border-style:house")) then
                element tei:div {
                    attribute type {"frame"},
297
                    fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
298
                }
299

MRodz's avatar
MRodz committed
300
            else if(matches($node/@rend, "border-bottom-style:brace")) then
301
                (fontaneSimple:transform($node/node()),
MRodz's avatar
MRodz committed
302
303
                element tei:ab {
                    attribute type {"bottom-brace"}
304
305
                })

MRodz's avatar
MRodz committed
306
307
            else if($node/@type = "cancel") then
                ()
308

MRodz's avatar
MRodz committed
309
310
311
312
            else if($node/@type = "marked_off") then
                element tei:seg {
                    $node/@type,
                    $node/@xml:id,
313
                    fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
314
                }
315

MRodz's avatar
MRodz committed
316
317
            else if($node/@type = "highlighted") then
                if($node/child::tei:zone[@type = "highlighted"]) then
318
                    fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
319
320
321
                else
                    element tei:hi {
                        attribute type {"vertical-mark"},
322
                        fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
323
                    }
324

325
326
            else if($node/@type = "illustration"
            or $node/@type = "printed_illustration") then
MRodz's avatar
MRodz committed
327
328
329
330
331
332
333
                if(not($node//tei:figure/parent::tei:del)) then
                    element {QName("http://www.tei-c.org/ns/1.0", "ab")}{
                        (if($node/child::tei:zone[@type = "illustration"]) then
                            attribute type {"composed-sketch"}
                        else
                            (attribute type {"sketch"},
                            if($node/parent::tei:zone[@type = "illustration"]) then
334
                                attribute rendition {"margin-left:" || $node/@ulx
MRodz's avatar
MRodz committed
335
336
337
338
339
                                || "cm; " || "margin-top:" || $node/@uly || "cm"}
                            else
                                ()
                            )
                        ),
340
                        fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
341
                    }
342
                else
MRodz's avatar
MRodz committed
343
                    ()
344

345
346
            else if($node/parent::tei:zone/@type = "illustration"
            or $node/parent::tei:zone/@type = "printed_illustration") then
MRodz's avatar
MRodz committed
347
348
                element {QName("http://www.tei-c.org/ns/1.0", "seg")}{
                    attribute type {"caption"},
349
                    attribute rendition {"margin-left:" || $node/@ulx || "cm; "
MRodz's avatar
MRodz committed
350
                        || "margin-top:" || $node/@uly || "cm"},
351
                    fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
352
                }
353

MRodz's avatar
MRodz committed
354
            else if($node/@type = "heading") then
355
356
                simpleHelpers:make-head($node)

MRodz's avatar
MRodz committed
357
358
            else if($node/@type = "list" or $node/@type = "item") then
                element {QName("http://www.tei-c.org/ns/1.0", $node/@type)}{
359
                    $node/(@* except (@type, @ulx, @uly, @lrx, @lry, @rotate)),
360
                    fontaneSimple:transform($node/node())
361
362
                }

MRodz's avatar
MRodz committed
363
364
            else if($node/@type = "legend") then
                element {QName("http://www.tei-c.org/ns/1.0", "div")}{
365
                    (if($node/@style
366
367
368
369
370
                    or $node/@rendition) then
                        attribute rendition {simpleHelpers:filter-rendition($node)}
                    else
                        ()),
                    $node/(@* except (@rendition, @style)),
371
                    fontaneSimple:transform($node/node())
372
373
                }

374
375
376
377
378
            else if((simpleHelpers:has-valid-style($node)
            or matches($node/@rendition, "black_letter")
            or matches($node/@rendition, "roman"))
            and simpleHelpers:has-valid-text($node)) then
                simpleHelpers:make-seg-with-rendition($node)
379

380
            else if(not($node/@xml:id)) then
381
                fontaneSimple:transform($node/node())
382

383
            else if($node/@xml:id and simpleHelpers:is-transposed($node)) then
384
                ()
385

MRodz's avatar
MRodz committed
386
            else
387
                fontaneSimple:transform($node/node())
388

MRodz's avatar
MRodz committed
389
        case element(tei:figure) return
390
            if(count($node/child::*) = 1 and $node/child::tei:figDesc) then
391
                (: genealogy lines probably shouldn't be displayed, but I still have to
392
393
394
395
                check that. in case they should be serialized, I leave the code :)
(:                if(matches($node/descendant::tei:ref, "Stammbaumverbindungslinie")) then:)
(:                    element tei:seg {:)
(:                        $node/@*,:)
396
(:                        fontaneSimple:transform($node/node()):)
397
398
399
400
401
402
(:                    }:)
(:                else if(matches($node/descendant::tei:ref, "Schlusslinie")):)
                if(matches($node/descendant::tei:ref, "Schlusslinie"))
                    then
                        element tei:ab {
                            switch ($node/descendant::tei:ref)
403
404
405
                                case "horizontale einfache Schlusslinie" return
                                    attribute type {"long-end-line"}
                                case "Schlusslinie; horizontale Halbschleife von links oben nach rechts" return
406
                                    attribute type {"long-end-line"}
407
                                case "horizontale einfache Schlusslinie (gewellt)" return
408
                                    attribute type {"long-end-line-wavy"}
409
                                case "Schlusslinien; horizontale Schleife von links oben nach rechts unten" return
410
                                    attribute type {"bottom-brace-short"}
411
412
                                default return
                                    attribute type {"end-line"}
413
414
                        }
                else if(matches($node/descendant::tei:ref, "Absatzlinie")
415
416
                (: in case of double paragraph lines the single lines are
                encoded with "oberer" resp. "unterer Teil", but we only
417
418
419
420
421
422
423
424
425
426
                serialize the encoding for the upper line :)
                and not(matches($node/descendant::tei:ref, "unterer Teil"))
                and not(matches($node/descendant::tei:figDesc, "unsicher"))) then
                    element tei:ab {
                        if(matches($node/descendant::tei:ref, "doppelt"))
                            then
                                attribute type {"short-paragraph-line-double"}
                            else
                                attribute type {"short-paragraph-line"}
                    }
MRodz's avatar
MRodz committed
427
428
                else if($node/parent::tei:zone[@type = "illustration"]) then
                    element {QName("http://www.tei-c.org/ns/1.0", $node/name())}{
Michelle Rodzis's avatar
Michelle Rodzis committed
429
430
                        $node/@*,
                        fontaneSimple:transform($nodes/node())
MRodz's avatar
MRodz committed
431
                    }
432
                else
MRodz's avatar
MRodz committed
433
434
                    ()
            else
435
                simpleHelpers:copy-element($node)
436

MRodz's avatar
MRodz committed
437
        case element(tei:note) return
438
            if($node/@type = "authorial"
439
            and not($node/@subtype = "footnote")) then
MRodz's avatar
MRodz committed
440
                ()
441
            else
442
                simpleHelpers:copy-element($node)
443

MRodz's avatar
MRodz committed
444
445
446
447
448
449
        case element(tei:certainty) return
            element {QName("http://www.tei-c.org/ns/1.0", "note")}{
                attribute type {"editorial"},
                attribute subtype {"certainty"},
                $node/@cert,
                $node/@target,
450
                fontaneSimple:transform($node/node())
MRodz's avatar
MRodz committed
451
            }
452

MRodz's avatar
MRodz committed
453
        case element(tei:figDesc) return
454
            simpleHelpers:copy-element($node)
455

MRodz's avatar
MRodz committed
456
        case element(tei:ref) return
457
            simpleHelpers:copy-element($node)
458

MRodz's avatar
MRodz committed
459
        case element(tei:space) return
460
            simpleHelpers:copy-element($node)
461

MRodz's avatar
MRodz committed
462
        case element(tei:choice) return
463
            simpleHelpers:copy-element($node)
464

MRodz's avatar
MRodz committed
465
        case element(tei:abbr) return
466
            simpleHelpers:copy-element($node)
467

MRodz's avatar
MRodz committed
468
        case element(tei:expan) return
469
470
            simpleHelpers:copy-element($node)

MRodz's avatar
MRodz committed
471
        case element(tei:rs) return
472
473
            simpleHelpers:copy-element($node)

MRodz's avatar
MRodz committed
474
        case element(tei:date) return
475
            element {QName("http://www.tei-c.org/ns/1.0", $node/name())}{
476
477
                (if($node/@when-iso) then
                    attribute when {$node/@when-iso}
478
479
                else
                    ()),
480
481
                (if($node/@from-iso) then
                    attribute from {$node/@from-iso}
482
483
                else
                    ()),
484
485
                (if($node/@to-iso) then
                    attribute to {$node/@to-iso}
Michelle Rodzis's avatar
Michelle Rodzis committed
486
487
488
489
                else
                    ()),                
                (if($node/@notAfter-iso) then
                    attribute notAfter {$node/@notAfter-iso}
490
                else
491
                    ()),
Michelle Rodzis's avatar
Michelle Rodzis committed
492
                $node/(@* except (@when-iso, @to-iso, @from-iso, @notAfter-iso)),
493
                fontaneSimple:transform($node/node())
494
            }
495

496
        case element(tei:ptr) return
Michelle Rodzis's avatar
Michelle Rodzis committed
497
            text{"test"}
498

MRodz's avatar
MRodz committed
499
        default return
500
            fontaneSimple:transform($node/node())
501
};