gfl-indexer.xslt 15 KB
Newer Older
Dennis Neumann's avatar
Dennis Neumann committed
1
2
<?xml version="1.0" encoding="utf-8"?>

Dennis Neumann's avatar
Dennis Neumann committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
<!-- 

This script produces Solr XML documents.


Field 'fulltext_html'

This field  contains the HTML representation of the text of a TEI document (e. g. a Goethe letter).
The Goethe letters are composed of different parts, for example 'opener', 'closer', 'salute'.
All those parts are represented here as <div>'s with the corresponding CSS classes.
The frontend viewer must decide how to format those parts and present them to the user.

Also, the original TEI files contain mark-up for many in-text parts, like dates, names, underlined words, etc.
Most of these are also transformed to <div>'s with their own CSS classes.
Although the in-text parts are by nature inline elements, we use here <div>'s and not <span>'s.
The reason is that Solr seems to have problems when highlighting fields that contain <span>'s
by sometimes producing corrupt HTML.
By using <div>'s, we avoid this problem.
In the frontend, these <div>'s must be set to 'display: inline'.

Some other in-text parts are transformed to special HTML elements.
For example, superscripted text is marked as <sup>, because HTML offers the appropriate element.

The project is still continuing and new TEI files are being produced.
That's why there might be new elements in the future that cannot be handled yet in this script.
The text of such TEI elements is enclosed in HTML elements of class 'unknown-element'.
Furthermore, a warning message is generated that contains data of the first occurrence of such a new element.






 -->

Dennis Neumann's avatar
Dennis Neumann committed
38
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
39
40
41
   xpath-default-namespace="http://www.tei-c.org/ns/1.0" xmlns:gfl="http://sub.gfl.de"
   xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:saxon="http://saxon.sf.net/" exclude-result-prefixes="gfl saxon xs">

Dennis Neumann's avatar
Dennis Neumann committed
42
   <xsl:output method="xml" indent="yes" saxon:suppress-indentation="div" />
Dennis Neumann's avatar
Dennis Neumann committed
43
   <xsl:strip-space elements="*" />
44
   <xsl:preserve-space elements="msIdentifier bibl p" />
45
46
47
48

   <xsl:template match="/">
      <add>
         <doc>
Dennis Neumann's avatar
Dennis Neumann committed
49
            <xsl:apply-templates select="TEI" />
50
51
52
         </doc>
      </add>
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
53

Dennis Neumann's avatar
Dennis Neumann committed
54
55
56
57
   <xsl:template match="TEI">
      <xsl:apply-templates select="teiHeader | text" />
   </xsl:template>

58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
   <xsl:template match="text()" mode="html_for_whole_article">
      <xsl:variable name="currentText" select="replace(., '\s+', ' ')" />
      <xsl:choose>
         <xsl:when test="ends-with(., '&#0173;')">
            <xsl:value-of select="replace($currentText, '&#0173;', '-')" />
         </xsl:when>
         <xsl:otherwise>
            <xsl:value-of select="$currentText" />
         </xsl:otherwise>
      </xsl:choose>
   </xsl:template>
   
   <xsl:template match="text()" mode="text_only">
      <xsl:variable name="currentText" select="replace(., '\s+', ' ')" />
      <xsl:choose>
         <xsl:when test="ends-with(., '-')">
Dennis Neumann's avatar
Dennis Neumann committed
74
75
            <!-- These are cases where one word is divided between two lines. 
            The minus sign is removed here, the line break (<lb/>) is removed in its own template. -->
76
77
78
            <xsl:value-of select="substring($currentText, 1, string-length($currentText)-1)" />
         </xsl:when>
         <xsl:when test="ends-with(., '&#0173;')">
Dennis Neumann's avatar
Dennis Neumann committed
79
80
81
            <!-- A soft hyphen is a convention to mark a hyphen that belongs to the word or is a hyphen on its own.
            For now, it is just replaced by a minus sign. 
            Later, it might be useful to differentiate between word divisions and hyphens. -->
82
83
84
85
86
87
            <xsl:value-of select="replace($currentText, '&#0173;', '-')" />
         </xsl:when>
         <xsl:otherwise>
            <xsl:value-of select="$currentText" />
         </xsl:otherwise>
      </xsl:choose>
Dennis Neumann's avatar
Dennis Neumann committed
88
89
90
91
92
   </xsl:template>
   
   <!--###########   Header   #######################-->
   
   <xsl:template match="teiHeader">
Dennis Neumann's avatar
Dennis Neumann committed
93
      <xsl:apply-templates select="fileDesc | profileDesc/textClass" />
Dennis Neumann's avatar
Dennis Neumann committed
94
95
96
97
98
99
100
   </xsl:template>
   
   <xsl:template match="fileDesc">
      <xsl:apply-templates select="titleStmt/title" />
      <xsl:apply-templates select="titleStmt/title[@type='desc']/name" />
      <xsl:apply-templates select="titleStmt/title[@type='desc']/date[@type='orn']" />
      <xsl:apply-templates select="titleStmt/author/name" />
101
      <xsl:apply-templates select="sourceDesc" />
Dennis Neumann's avatar
Dennis Neumann committed
102
103
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
104
105
106
107
   <xsl:template match="profileDesc/textClass">
      <xsl:apply-templates select="keywords/term" />
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
108
   <xsl:template match="title[@type='short']">
Dennis Neumann's avatar
Dennis Neumann committed
109
      <field name="short_title">
Dennis Neumann's avatar
Dennis Neumann committed
110
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
111
112
113
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
114
   <xsl:template match="title[@type='desc']">
Dennis Neumann's avatar
Dennis Neumann committed
115
      <field name="title">
Dennis Neumann's avatar
Dennis Neumann committed
116
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
117
118
119
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
120
   <xsl:template match="title/name[@type='place' and @subtype='orn']">
Dennis Neumann's avatar
Dennis Neumann committed
121
      <field name="origin_place">
Dennis Neumann's avatar
Dennis Neumann committed
122
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
123
124
125
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
126
   <xsl:template match="title/name[@type='place' and @subtype='dtn']">
Dennis Neumann's avatar
Dennis Neumann committed
127
      <field name="destination_place">
Dennis Neumann's avatar
Dennis Neumann committed
128
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
129
130
131
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
132
   <xsl:template match="title/name[@type='person' and @subtype='rcp']">
Dennis Neumann's avatar
Dennis Neumann committed
133
      <field name="recipient">
Dennis Neumann's avatar
Dennis Neumann committed
134
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
135
136
137
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
138
   <xsl:template match="title/date[@type='orn']">
Dennis Neumann's avatar
Dennis Neumann committed
139
140
141
142
143
      <field name="origin_date">
         <xsl:value-of select="@when" />
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
144
   <xsl:template match="author/name[@type='person' and @subtype='aut']">
Dennis Neumann's avatar
Dennis Neumann committed
145
      <field name="author">
Dennis Neumann's avatar
Dennis Neumann committed
146
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
147
148
      </field>
   </xsl:template>
149
150
151
152
153
154
   
   <xsl:template match="sourceDesc">
      <field name="source_description">
         <xsl:apply-templates mode="text_only" />
      </field>
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
155

Dennis Neumann's avatar
Dennis Neumann committed
156
157
158
159
160
161
162
163
164
165
166
167
168
   <xsl:template match="textClass/keywords[@scheme='#gnd']/term">
      <field name="gnd_keyword">
         <xsl:apply-templates mode="text_only" />
      </field>
   </xsl:template>

   <xsl:template match="textClass/keywords[@scheme='free']/term">
      <field name="free_keyword">
         <xsl:apply-templates mode="text_only" />
      </field>
   </xsl:template>


169

Dennis Neumann's avatar
Dennis Neumann committed
170
171
172
173
174
175
176
   <!--###################   text/body   ##########################-->

   <xsl:template match="text">
      <field name="id">
         <xsl:value-of select="@xml:id" />
      </field>
      <field name="fulltext">
Dennis Neumann's avatar
Dennis Neumann committed
177
         <xsl:apply-templates select="body" mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
178
      </field>
Dennis Neumann's avatar
Dennis Neumann committed
179
180
181
      <field name="fulltext_html">
         <xsl:text disable-output-escaping="yes">&lt;![CDATA[</xsl:text>
            <div class="article">
182
               <xsl:apply-templates mode="html_for_whole_article" />
Dennis Neumann's avatar
Dennis Neumann committed
183
184
185
            </div>
         <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
      </field>
Dennis Neumann's avatar
Dennis Neumann committed
186
      <xsl:apply-templates select=".//note[@type='com']" />
187
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
188
189
   
   <xsl:template match="body | div" mode="text_only">
190
      <xsl:apply-templates mode="text_only"/>
Dennis Neumann's avatar
Dennis Neumann committed
191
192
   </xsl:template>

193
   <xsl:template match="p | salute | signed | addrLine | dateline" mode="text_only">
Dennis Neumann's avatar
Dennis Neumann committed
194
195
196
      <xsl:apply-templates mode="text_only" />
      <xsl:text> </xsl:text>
   </xsl:template>
197

198
199
200
201
202
   <xsl:template match="note[@place='end']" mode="text_only">
      <xsl:text> </xsl:text>
      <xsl:apply-templates mode="text_only" />
   </xsl:template>

203
   <xsl:template match="lb" mode="text_only">
204
205
206
207
208
209
210
211
212
213
214
215
216
217
      <xsl:variable name="precedingText" select="preceding-sibling::text()[1]" />
      <xsl:choose>
         <xsl:when test="ends-with($precedingText, '-')">
            <!-- Cases where a word is divided between two lines -->
            <!-- no output -->
         </xsl:when>
         <xsl:when test="ends-with($precedingText, '&#0173;') and not(ends-with($precedingText, ' &#0173;'))">
            <!-- Cases where the hyphen belongs to the word (Anna-<lb/>Lena) -->
            <!-- no output -->
         </xsl:when>
         <xsl:otherwise>
            <xsl:text> </xsl:text>
         </xsl:otherwise>
      </xsl:choose>
218
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
219

220
221
222
223
224
225
226
227
   <xsl:template match="pb" mode="text_only">
      <xsl:text> </xsl:text>
   </xsl:template>

   <xsl:template match="space" mode="text_only">
      <xsl:text> </xsl:text>
   </xsl:template>

228
   <xsl:template match="note[@type='com']" mode="text_only">
229
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
230
   
Dennis Neumann's avatar
Dennis Neumann committed
231
232
233
234
235
236
   <xsl:template match="note[@type='com']">
      <field name="note_comment">
         <xsl:apply-templates mode="text_only" />
      </field>
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
237
238
   
   <!-- ++++++++++++ HTML +++++++++++++++++++ -->
239
240

   <xsl:template match="*" mode="html_for_whole_article">
241
      <!--xsl:if test=".//text()"-->
242
243
244
         <xsl:message>
            <xsl:text>Unknown element &lt;</xsl:text>
            <xsl:value-of select="local-name()" />
245
246
247
            <xsl:if test="@rendition">
               <xsl:text> rendition="</xsl:text>
               <xsl:value-of select="@rendition" />
248
249
250
251
252
253
254
255
256
257
258
259
               <xsl:text>"</xsl:text>
            </xsl:if>
            <xsl:if test="@type">
               <xsl:text> type="</xsl:text>
               <xsl:value-of select="@type" />
               <xsl:text>"</xsl:text>
            </xsl:if>
            <xsl:text>&gt; - first occurrence: </xsl:text>
         </xsl:message>
         <span class="unknown-element">
            <xsl:apply-templates mode="html_for_whole_article" />
         </span>
260
      <!--/xsl:if-->
261
262
263
264
265
   </xsl:template>
   
   <xsl:template match="body | div" mode="html_for_whole_article">
      <xsl:apply-templates mode="html_for_whole_article"/>
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
266
   
267
   <xsl:template match="p | opener | salute | seg | bibl | closer | signed | dateline | date 
268
   | label[not(@rendition)] | choice | abbr | expan | postscript" mode="html_for_whole_article">
Dennis Neumann's avatar
Dennis Neumann committed
269
      <div class="{local-name()}">
Dennis Neumann's avatar
Dennis Neumann committed
270
271
272
273
274
275
276
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>

   <xsl:template match="lb" mode="html_for_whole_article">
      <br />
   </xsl:template>
277

278
279
   <xsl:template match="space[@unit='lines']" mode="html_for_whole_article">
      <xsl:variable name="emptyLines" select="@quantity" />
280
281
282
283
284
285
286
287
288
289
      <xsl:choose>
         <xsl:when test="$emptyLines castable as xs:integer">
            <xsl:for-each select="1 to $emptyLines">
               <br />
            </xsl:for-each>
         </xsl:when>
         <xsl:otherwise>
            <br />
         </xsl:otherwise>
      </xsl:choose>
290
291
292
293
294
295
296
297
   </xsl:template>

   <xsl:template match="name[@type='place']" mode="html_for_whole_article">
      <div class="place">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
298
299
300
301
302
303
304
305
306
307
308
309
   <xsl:template match="name[@type='org']" mode="html_for_whole_article">
      <div class="org">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="name[@type='person']" mode="html_for_whole_article">
      <div class="person">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
310
311
312
313
314
315
   <xsl:template match="name[@type='object']" mode="html_for_whole_article">
      <div class="object">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
316
317
   <xsl:template match="pb" mode="html_for_whole_article">
      <div class="page-break">
318
319
         <xsl:variable name="facsId" select="substring(@facs, 2, string-length(@facs))" />
         <xsl:variable name="graphicUrl" select="id($facsId)/@url" />
320
321
322
         <xsl:variable name="graphicUrlWithoutJpg">
            <xsl:value-of select="if (ends-with($graphicUrl, '.jpg')) then substring($graphicUrl, 1, string-length($graphicUrl)-4) else $graphicUrl" />
         </xsl:variable>
323
         
Dennis Neumann's avatar
Dennis Neumann committed
324
         <xsl:choose>
325
326
            <xsl:when test="@n ne '' and $graphicUrlWithoutJpg">
               <a href="{concat('/', $graphicUrlWithoutJpg)}" target="_blank">
Dennis Neumann's avatar
Dennis Neumann committed
327
328
329
                  <xsl:value-of select="@n" />
               </a>
            </xsl:when>
330
331
332
            <xsl:when test="@n ne ''">
               <xsl:value-of select="@n" />
            </xsl:when>
Dennis Neumann's avatar
Dennis Neumann committed
333
334
335
336
            <xsl:otherwise>
               <xsl:text> </xsl:text>
            </xsl:otherwise>
         </xsl:choose>
337
338
339
      </div>
   </xsl:template>
   
340
341
342
343
344
345
   <xsl:template match="rs[@type='person']" mode="html_for_whole_article">
      <div class="rs-person">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
346
347
348
349
350
351
   <xsl:template match="rs[@type='place']" mode="html_for_whole_article">
      <div class="rs-place">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
      
352
353
354
355
356
   <xsl:template match="note[@type='com']" mode="html_for_whole_article">
      <div class="note-comment">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
357
358
359
360
361
362
363
364
365
366
367
368
   
   <xsl:template match="note[@type='footnote']" mode="html_for_whole_article">
      <div class="note-footnote">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="note[not(@type)]" mode="html_for_whole_article">
      <div class="note">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
369
370
371
372
373
374
375
   
   <xsl:template match="hi[@rendition='simple:underline']" mode="html_for_whole_article">
      <div class="underline">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
376
377
378
379
380
381
   <xsl:template match="hi[@rendition='simple:doubleunderline']" mode="html_for_whole_article">
      <div class="doubleunderline">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
382
   <xsl:template match="hi[@rendition='simple:superscript']" mode="html_for_whole_article">
383
      <sup>
384
         <xsl:apply-templates mode="html_for_whole_article" />
385
      </sup>
386
387
   </xsl:template>
   
388
389
390
391
392
393
   <xsl:template match="hi[@rendition='simple:subscript']" mode="html_for_whole_article">
      <sub>
         <xsl:apply-templates mode="html_for_whole_article" />
      </sub>
   </xsl:template>
   
394
395
396
397
398
399
   <xsl:template match="hi[@rendition='simple:italic']" mode="html_for_whole_article">
      <div class="italic">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
400
401
402
403
404
405
406
407
408
409
410
411
   <xsl:template match="hi[@rendition='simple:letterspace']" mode="html_for_whole_article">
      <div class="letterspace">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="hi[@rendition='simple:right']" mode="html_for_whole_article">
      <div class="right">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
412
413
414
415
416
417
   <xsl:template match="ref[@target]" mode="html_for_whole_article">
      <a href="{@target}">
         <xsl:apply-templates mode="html_for_whole_article" />
      </a>
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
418
419
   <xsl:template match="head[@rendition] | label[@rendition]" mode="html_for_whole_article">
      <xsl:variable name="classNames" select="local-name(), substring-after(@rendition, 'simple:')" />
420
      <div class="{$classNames}">
Dennis Neumann's avatar
Dennis Neumann committed
421
         <xsl:apply-templates mode="html_for_whole_article" />
422
      </div>
Dennis Neumann's avatar
Dennis Neumann committed
423
424
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
425
</xsl:stylesheet>