gfl-indexer.xslt 13.9 KB
Newer Older
Dennis Neumann's avatar
Dennis Neumann committed
1
2
<?xml version="1.0" encoding="utf-8"?>

Dennis Neumann's avatar
Dennis Neumann committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
<!-- 

This script produces Solr XML documents.


Field 'fulltext_html'

This field  contains the HTML representation of the text of a TEI document (e. g. a Goethe letter).
The Goethe letters are composed of different parts, for example 'opener', 'closer', 'salute'.
All those parts are represented here as <div>'s with the corresponding CSS classes.
The frontend viewer must decide how to format those parts and present them to the user.

Also, the original TEI files contain mark-up for many in-text parts, like dates, names, underlined words, etc.
Most of these are also transformed to <div>'s with their own CSS classes.
Although the in-text parts are by nature inline elements, we use here <div>'s and not <span>'s.
The reason is that Solr seems to have problems when highlighting fields that contain <span>'s
by sometimes producing corrupt HTML.
By using <div>'s, we avoid this problem.
In the frontend, these <div>'s must be set to 'display: inline'.

Some other in-text parts are transformed to special HTML elements.
For example, superscripted text is marked as <sup>, because HTML offers the appropriate element.

The project is still continuing and new TEI files are being produced.
That's why there might be new elements in the future that cannot be handled yet in this script.
The text of such TEI elements is enclosed in HTML elements of class 'unknown-element'.
Furthermore, a warning message is generated that contains data of the first occurrence of such a new element.






 -->

Dennis Neumann's avatar
Dennis Neumann committed
38
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
39
40
41
   xpath-default-namespace="http://www.tei-c.org/ns/1.0" xmlns:gfl="http://sub.gfl.de"
   xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:saxon="http://saxon.sf.net/" exclude-result-prefixes="gfl saxon xs">

Dennis Neumann's avatar
Dennis Neumann committed
42
   <xsl:output method="xml" indent="yes" saxon:suppress-indentation="div" />
Dennis Neumann's avatar
Dennis Neumann committed
43
   <xsl:strip-space elements="*" />
44
   <xsl:preserve-space elements="msIdentifier bibl p" />
45
46
47
48

   <xsl:template match="/">
      <add>
         <doc>
Dennis Neumann's avatar
Dennis Neumann committed
49
            <xsl:apply-templates select="TEI" />
50
51
52
         </doc>
      </add>
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
53

Dennis Neumann's avatar
Dennis Neumann committed
54
55
56
57
   <xsl:template match="TEI">
      <xsl:apply-templates select="teiHeader | text" />
   </xsl:template>

58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
   <xsl:template match="text()" mode="html_for_whole_article">
      <xsl:variable name="currentText" select="replace(., '\s+', ' ')" />
      <xsl:choose>
         <xsl:when test="ends-with(., '&#0173;')">
            <xsl:value-of select="replace($currentText, '&#0173;', '-')" />
         </xsl:when>
         <xsl:otherwise>
            <xsl:value-of select="$currentText" />
         </xsl:otherwise>
      </xsl:choose>
   </xsl:template>
   
   <xsl:template match="text()" mode="text_only">
      <xsl:variable name="currentText" select="replace(., '\s+', ' ')" />
      <xsl:choose>
         <xsl:when test="ends-with(., '-')">
Dennis Neumann's avatar
Dennis Neumann committed
74
75
            <!-- These are cases where one word is divided between two lines. 
            The minus sign is removed here, the line break (<lb/>) is removed in its own template. -->
76
77
78
            <xsl:value-of select="substring($currentText, 1, string-length($currentText)-1)" />
         </xsl:when>
         <xsl:when test="ends-with(., '&#0173;')">
Dennis Neumann's avatar
Dennis Neumann committed
79
80
81
            <!-- A soft hyphen is a convention to mark a hyphen that belongs to the word or is a hyphen on its own.
            For now, it is just replaced by a minus sign. 
            Later, it might be useful to differentiate between word divisions and hyphens. -->
82
83
84
85
86
87
            <xsl:value-of select="replace($currentText, '&#0173;', '-')" />
         </xsl:when>
         <xsl:otherwise>
            <xsl:value-of select="$currentText" />
         </xsl:otherwise>
      </xsl:choose>
Dennis Neumann's avatar
Dennis Neumann committed
88
89
90
91
92
   </xsl:template>
   
   <!--###########   Header   #######################-->
   
   <xsl:template match="teiHeader">
Dennis Neumann's avatar
Dennis Neumann committed
93
      <xsl:apply-templates select="fileDesc | profileDesc/textClass" />
Dennis Neumann's avatar
Dennis Neumann committed
94
95
96
97
98
99
100
   </xsl:template>
   
   <xsl:template match="fileDesc">
      <xsl:apply-templates select="titleStmt/title" />
      <xsl:apply-templates select="titleStmt/title[@type='desc']/name" />
      <xsl:apply-templates select="titleStmt/title[@type='desc']/date[@type='orn']" />
      <xsl:apply-templates select="titleStmt/author/name" />
101
      <xsl:apply-templates select="sourceDesc" />
Dennis Neumann's avatar
Dennis Neumann committed
102
103
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
104
105
106
107
   <xsl:template match="profileDesc/textClass">
      <xsl:apply-templates select="keywords/term" />
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
108
   <xsl:template match="title[@type='short']">
Dennis Neumann's avatar
Dennis Neumann committed
109
      <field name="short_title">
Dennis Neumann's avatar
Dennis Neumann committed
110
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
111
112
113
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
114
   <xsl:template match="title[@type='desc']">
Dennis Neumann's avatar
Dennis Neumann committed
115
      <field name="title">
Dennis Neumann's avatar
Dennis Neumann committed
116
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
117
118
119
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
120
   <xsl:template match="title/name[@type='place' and @subtype='orn']">
Dennis Neumann's avatar
Dennis Neumann committed
121
      <field name="origin_place">
Dennis Neumann's avatar
Dennis Neumann committed
122
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
123
124
125
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
126
   <xsl:template match="title/name[@type='place' and @subtype='dtn']">
Dennis Neumann's avatar
Dennis Neumann committed
127
      <field name="destination_place">
Dennis Neumann's avatar
Dennis Neumann committed
128
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
129
130
131
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
132
   <xsl:template match="title/name[@type='person' and @subtype='rcp']">
Dennis Neumann's avatar
Dennis Neumann committed
133
      <field name="recipient">
Dennis Neumann's avatar
Dennis Neumann committed
134
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
135
136
137
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
138
   <xsl:template match="title/date[@type='orn']">
Dennis Neumann's avatar
Dennis Neumann committed
139
140
141
142
143
      <field name="origin_date">
         <xsl:value-of select="@when" />
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
144
   <xsl:template match="author/name[@type='person' and @subtype='aut']">
Dennis Neumann's avatar
Dennis Neumann committed
145
      <field name="author">
Dennis Neumann's avatar
Dennis Neumann committed
146
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
147
148
      </field>
   </xsl:template>
149
150
151
152
153
154
   
   <xsl:template match="sourceDesc">
      <field name="source_description">
         <xsl:apply-templates mode="text_only" />
      </field>
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
155

Dennis Neumann's avatar
Dennis Neumann committed
156
157
158
159
160
161
162
163
164
165
166
167
168
   <xsl:template match="textClass/keywords[@scheme='#gnd']/term">
      <field name="gnd_keyword">
         <xsl:apply-templates mode="text_only" />
      </field>
   </xsl:template>

   <xsl:template match="textClass/keywords[@scheme='free']/term">
      <field name="free_keyword">
         <xsl:apply-templates mode="text_only" />
      </field>
   </xsl:template>


169

Dennis Neumann's avatar
Dennis Neumann committed
170
171
172
173
174
175
176
   <!--###################   text/body   ##########################-->

   <xsl:template match="text">
      <field name="id">
         <xsl:value-of select="@xml:id" />
      </field>
      <field name="fulltext">
Dennis Neumann's avatar
Dennis Neumann committed
177
         <xsl:apply-templates select="body" mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
178
      </field>
Dennis Neumann's avatar
Dennis Neumann committed
179
180
181
      <field name="fulltext_html">
         <xsl:text disable-output-escaping="yes">&lt;![CDATA[</xsl:text>
            <div class="article">
182
               <xsl:apply-templates mode="html_for_whole_article" />
Dennis Neumann's avatar
Dennis Neumann committed
183
184
185
            </div>
         <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
      </field>
Dennis Neumann's avatar
Dennis Neumann committed
186
      <xsl:apply-templates select=".//note[@type='com']" />
187
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
188
189
   
   <xsl:template match="body | div" mode="text_only">
190
      <xsl:apply-templates mode="text_only"/>
Dennis Neumann's avatar
Dennis Neumann committed
191
192
193
194
195
196
   </xsl:template>

   <xsl:template match="p" mode="text_only">
      <xsl:apply-templates mode="text_only" />
      <xsl:text> </xsl:text>
   </xsl:template>
197

198
   <xsl:template match="lb" mode="text_only">
199
200
201
202
203
204
205
206
207
208
209
210
211
212
      <xsl:variable name="precedingText" select="preceding-sibling::text()[1]" />
      <xsl:choose>
         <xsl:when test="ends-with($precedingText, '-')">
            <!-- Cases where a word is divided between two lines -->
            <!-- no output -->
         </xsl:when>
         <xsl:when test="ends-with($precedingText, '&#0173;') and not(ends-with($precedingText, ' &#0173;'))">
            <!-- Cases where the hyphen belongs to the word (Anna-<lb/>Lena) -->
            <!-- no output -->
         </xsl:when>
         <xsl:otherwise>
            <xsl:text> </xsl:text>
         </xsl:otherwise>
      </xsl:choose>
213
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
214

215
216
217
218
219
220
221
222
   <xsl:template match="pb" mode="text_only">
      <xsl:text> </xsl:text>
   </xsl:template>

   <xsl:template match="space" mode="text_only">
      <xsl:text> </xsl:text>
   </xsl:template>

223
   <xsl:template match="note[@type='com']" mode="text_only">
224
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
225
   
Dennis Neumann's avatar
Dennis Neumann committed
226
227
228
229
230
231
   <xsl:template match="note[@type='com']">
      <field name="note_comment">
         <xsl:apply-templates mode="text_only" />
      </field>
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
232
233
   
   <!-- ++++++++++++ HTML +++++++++++++++++++ -->
234
235
236
237
238
239

   <xsl:template match="*" mode="html_for_whole_article">
      <xsl:if test=".//text()">
         <xsl:message>
            <xsl:text>Unknown element &lt;</xsl:text>
            <xsl:value-of select="local-name()" />
240
241
242
            <xsl:if test="@rendition">
               <xsl:text> rendition="</xsl:text>
               <xsl:value-of select="@rendition" />
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
               <xsl:text>"</xsl:text>
            </xsl:if>
            <xsl:if test="@type">
               <xsl:text> type="</xsl:text>
               <xsl:value-of select="@type" />
               <xsl:text>"</xsl:text>
            </xsl:if>
            <xsl:text>&gt; - first occurrence: </xsl:text>
         </xsl:message>
         <span class="unknown-element">
            <xsl:apply-templates mode="html_for_whole_article" />
         </span>
      </xsl:if>
   </xsl:template>
   
   <xsl:template match="body | div" mode="html_for_whole_article">
      <xsl:apply-templates mode="html_for_whole_article"/>
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
261
   
262
   <xsl:template match="p | opener | salute | seg | bibl | closer | signed | dateline | date 
263
   | label[not(@rendition)] | choice | abbr | expan | postscript" mode="html_for_whole_article">
Dennis Neumann's avatar
Dennis Neumann committed
264
      <div class="{local-name()}">
Dennis Neumann's avatar
Dennis Neumann committed
265
266
267
268
269
270
271
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>

   <xsl:template match="lb" mode="html_for_whole_article">
      <br />
   </xsl:template>
272

273
274
   <xsl:template match="space[@unit='lines']" mode="html_for_whole_article">
      <xsl:variable name="emptyLines" select="@quantity" />
275
276
277
278
279
280
281
282
283
284
      <xsl:choose>
         <xsl:when test="$emptyLines castable as xs:integer">
            <xsl:for-each select="1 to $emptyLines">
               <br />
            </xsl:for-each>
         </xsl:when>
         <xsl:otherwise>
            <br />
         </xsl:otherwise>
      </xsl:choose>
285
286
287
288
289
290
291
292
   </xsl:template>

   <xsl:template match="name[@type='place']" mode="html_for_whole_article">
      <div class="place">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
293
294
295
296
297
298
299
300
301
302
303
304
   <xsl:template match="name[@type='org']" mode="html_for_whole_article">
      <div class="org">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="name[@type='person']" mode="html_for_whole_article">
      <div class="person">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
305
306
307
308
309
310
   <xsl:template match="name[@type='object']" mode="html_for_whole_article">
      <div class="object">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
311
312
   <xsl:template match="pb" mode="html_for_whole_article">
      <div class="page-break">
313
314
315
316
         <xsl:variable name="facsId" select="substring(@facs, 2, string-length(@facs))" />
         <xsl:variable name="graphicUrl" select="id($facsId)/@url" />
         
         <!--xsl:if test="$graphicUrl"-->
Dennis Neumann's avatar
Dennis Neumann committed
317
318
319
320
321
322
323
324
325
326
         <xsl:choose>
            <xsl:when test="@n ne ''">
               <a href="/image/EPN_230345212_0010-0" target="_blank">
                  <xsl:value-of select="@n" />
               </a>
            </xsl:when>
            <xsl:otherwise>
               <xsl:text> </xsl:text>
            </xsl:otherwise>
         </xsl:choose>
327
         <!--/xsl:if-->
328
329
330
      </div>
   </xsl:template>
   
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
   <xsl:template match="rs[@type='person']" mode="html_for_whole_article">
      <div class="rs-person">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="note[@type='com']" mode="html_for_whole_article">
      <div class="note-comment">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="hi[@rendition='simple:underline']" mode="html_for_whole_article">
      <div class="underline">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
349
350
351
352
353
354
   <xsl:template match="hi[@rendition='simple:doubleunderline']" mode="html_for_whole_article">
      <div class="doubleunderline">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
355
   <xsl:template match="hi[@rendition='simple:superscript']" mode="html_for_whole_article">
356
      <sup>
357
         <xsl:apply-templates mode="html_for_whole_article" />
358
      </sup>
359
360
   </xsl:template>
   
361
362
363
364
365
366
   <xsl:template match="hi[@rendition='simple:subscript']" mode="html_for_whole_article">
      <sub>
         <xsl:apply-templates mode="html_for_whole_article" />
      </sub>
   </xsl:template>
   
367
368
369
370
371
372
   <xsl:template match="hi[@rendition='simple:italic']" mode="html_for_whole_article">
      <div class="italic">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
373
374
375
376
377
378
379
380
381
382
383
384
   <xsl:template match="hi[@rendition='simple:letterspace']" mode="html_for_whole_article">
      <div class="letterspace">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="hi[@rendition='simple:right']" mode="html_for_whole_article">
      <div class="right">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
385
386
387
388
389
390
   <xsl:template match="ref[@target]" mode="html_for_whole_article">
      <a href="{@target}">
         <xsl:apply-templates mode="html_for_whole_article" />
      </a>
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
391
392
   <xsl:template match="head[@rendition] | label[@rendition]" mode="html_for_whole_article">
      <xsl:variable name="classNames" select="local-name(), substring-after(@rendition, 'simple:')" />
393
      <div class="{$classNames}">
Dennis Neumann's avatar
Dennis Neumann committed
394
         <xsl:apply-templates mode="html_for_whole_article" />
395
      </div>
Dennis Neumann's avatar
Dennis Neumann committed
396
397
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
398
</xsl:stylesheet>