gfl-indexer.xslt 11.6 KB
Newer Older
Dennis Neumann's avatar
Dennis Neumann committed
1
2
<?xml version="1.0" encoding="utf-8"?>

Dennis Neumann's avatar
Dennis Neumann committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
<!-- 

This script produces Solr XML documents.


Field 'fulltext_html'

This field  contains the HTML representation of the text of a TEI document (e. g. a Goethe letter).
The Goethe letters are composed of different parts, for example 'opener', 'closer', 'salute'.
All those parts are represented here as <div>'s with the corresponding CSS classes.
The frontend viewer must decide how to format those parts and present them to the user.

Also, the original TEI files contain mark-up for many in-text parts, like dates, names, underlined words, etc.
Most of these are also transformed to <div>'s with their own CSS classes.
Although the in-text parts are by nature inline elements, we use here <div>'s and not <span>'s.
The reason is that Solr seems to have problems when highlighting fields that contain <span>'s
by sometimes producing corrupt HTML.
By using <div>'s, we avoid this problem.
In the frontend, these <div>'s must be set to 'display: inline'.

Some other in-text parts are transformed to special HTML elements.
For example, superscripted text is marked as <sup>, because HTML offers the appropriate element.

The project is still continuing and new TEI files are being produced.
That's why there might be new elements in the future that cannot be handled yet in this script.
The text of such TEI elements is enclosed in HTML elements of class 'unknown-element'.
Furthermore, a warning message is generated that contains data of the first occurrence of such a new element.






 -->

Dennis Neumann's avatar
Dennis Neumann committed
38
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
39
40
41
   xpath-default-namespace="http://www.tei-c.org/ns/1.0" xmlns:gfl="http://sub.gfl.de"
   xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:saxon="http://saxon.sf.net/" exclude-result-prefixes="gfl saxon xs">

Dennis Neumann's avatar
Dennis Neumann committed
42
   <xsl:output method="xml" indent="yes" saxon:suppress-indentation="div" />
Dennis Neumann's avatar
Dennis Neumann committed
43
   <xsl:strip-space elements="*" />
44
   <xsl:preserve-space elements="msIdentifier bibl p" />
45
46
47
48

   <xsl:template match="/">
      <add>
         <doc>
Dennis Neumann's avatar
Dennis Neumann committed
49
            <xsl:apply-templates select="TEI" />
50
51
52
         </doc>
      </add>
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
53

Dennis Neumann's avatar
Dennis Neumann committed
54
55
56
57
   <xsl:template match="TEI">
      <xsl:apply-templates select="teiHeader | text" />
   </xsl:template>

58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
   <xsl:template match="text()" mode="html_for_whole_article">
      <xsl:variable name="currentText" select="replace(., '\s+', ' ')" />
      <xsl:choose>
         <xsl:when test="ends-with(., '&#0173;')">
            <xsl:value-of select="replace($currentText, '&#0173;', '-')" />
         </xsl:when>
         <xsl:otherwise>
            <xsl:value-of select="$currentText" />
         </xsl:otherwise>
      </xsl:choose>
   </xsl:template>
   
   <xsl:template match="text()" mode="text_only">
      <xsl:variable name="currentText" select="replace(., '\s+', ' ')" />
      <xsl:choose>
         <xsl:when test="ends-with(., '-')">
            <xsl:value-of select="substring($currentText, 1, string-length($currentText)-1)" />
         </xsl:when>
         <xsl:when test="ends-with(., '&#0173;')">
            <xsl:value-of select="replace($currentText, '&#0173;', '-')" />
         </xsl:when>
         <xsl:otherwise>
            <xsl:value-of select="$currentText" />
         </xsl:otherwise>
      </xsl:choose>
Dennis Neumann's avatar
Dennis Neumann committed
83
84
85
86
87
   </xsl:template>
   
   <!--###########   Header   #######################-->
   
   <xsl:template match="teiHeader">
Dennis Neumann's avatar
Dennis Neumann committed
88
      <xsl:apply-templates select="fileDesc | profileDesc/textClass" />
Dennis Neumann's avatar
Dennis Neumann committed
89
90
91
92
93
94
95
   </xsl:template>
   
   <xsl:template match="fileDesc">
      <xsl:apply-templates select="titleStmt/title" />
      <xsl:apply-templates select="titleStmt/title[@type='desc']/name" />
      <xsl:apply-templates select="titleStmt/title[@type='desc']/date[@type='orn']" />
      <xsl:apply-templates select="titleStmt/author/name" />
96
      <xsl:apply-templates select="sourceDesc" />
Dennis Neumann's avatar
Dennis Neumann committed
97
98
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
99
100
101
102
   <xsl:template match="profileDesc/textClass">
      <xsl:apply-templates select="keywords/term" />
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
103
   <xsl:template match="title[@type='short']">
Dennis Neumann's avatar
Dennis Neumann committed
104
      <field name="short_title">
Dennis Neumann's avatar
Dennis Neumann committed
105
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
106
107
108
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
109
   <xsl:template match="title[@type='desc']">
Dennis Neumann's avatar
Dennis Neumann committed
110
      <field name="title">
Dennis Neumann's avatar
Dennis Neumann committed
111
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
112
113
114
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
115
   <xsl:template match="title/name[@type='place' and @subtype='orn']">
Dennis Neumann's avatar
Dennis Neumann committed
116
      <field name="origin_place">
Dennis Neumann's avatar
Dennis Neumann committed
117
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
118
119
120
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
121
   <xsl:template match="title/name[@type='place' and @subtype='dtn']">
Dennis Neumann's avatar
Dennis Neumann committed
122
      <field name="destination_place">
Dennis Neumann's avatar
Dennis Neumann committed
123
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
124
125
126
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
127
   <xsl:template match="title/name[@type='person' and @subtype='rcp']">
Dennis Neumann's avatar
Dennis Neumann committed
128
      <field name="recipient">
Dennis Neumann's avatar
Dennis Neumann committed
129
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
130
131
132
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
133
   <xsl:template match="title/date[@type='orn']">
Dennis Neumann's avatar
Dennis Neumann committed
134
135
136
137
138
      <field name="origin_date">
         <xsl:value-of select="@when" />
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
139
   <xsl:template match="author/name[@type='person' and @subtype='aut']">
Dennis Neumann's avatar
Dennis Neumann committed
140
      <field name="author">
Dennis Neumann's avatar
Dennis Neumann committed
141
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
142
143
      </field>
   </xsl:template>
144
145
146
147
148
149
   
   <xsl:template match="sourceDesc">
      <field name="source_description">
         <xsl:apply-templates mode="text_only" />
      </field>
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
150

Dennis Neumann's avatar
Dennis Neumann committed
151
152
153
154
155
156
157
158
159
160
161
162
163
   <xsl:template match="textClass/keywords[@scheme='#gnd']/term">
      <field name="gnd_keyword">
         <xsl:apply-templates mode="text_only" />
      </field>
   </xsl:template>

   <xsl:template match="textClass/keywords[@scheme='free']/term">
      <field name="free_keyword">
         <xsl:apply-templates mode="text_only" />
      </field>
   </xsl:template>


164

Dennis Neumann's avatar
Dennis Neumann committed
165
166
167
168
169
170
171
   <!--###################   text/body   ##########################-->

   <xsl:template match="text">
      <field name="id">
         <xsl:value-of select="@xml:id" />
      </field>
      <field name="fulltext">
Dennis Neumann's avatar
Dennis Neumann committed
172
         <xsl:apply-templates select="body" mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
173
      </field>
Dennis Neumann's avatar
Dennis Neumann committed
174
175
176
      <field name="fulltext_html">
         <xsl:text disable-output-escaping="yes">&lt;![CDATA[</xsl:text>
            <div class="article">
177
               <xsl:apply-templates mode="html_for_whole_article" />
Dennis Neumann's avatar
Dennis Neumann committed
178
179
180
            </div>
         <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
      </field>
181
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
182
183
   
   <xsl:template match="body | div" mode="text_only">
184
      <xsl:apply-templates mode="text_only"/>
Dennis Neumann's avatar
Dennis Neumann committed
185
186
187
188
189
190
   </xsl:template>

   <xsl:template match="p" mode="text_only">
      <xsl:apply-templates mode="text_only" />
      <xsl:text> </xsl:text>
   </xsl:template>
191

192
   <xsl:template match="lb" mode="text_only">
193
194
195
196
197
198
199
200
201
202
203
204
205
206
      <xsl:variable name="precedingText" select="preceding-sibling::text()[1]" />
      <xsl:choose>
         <xsl:when test="ends-with($precedingText, '-')">
            <!-- Cases where a word is divided between two lines -->
            <!-- no output -->
         </xsl:when>
         <xsl:when test="ends-with($precedingText, '&#0173;') and not(ends-with($precedingText, ' &#0173;'))">
            <!-- Cases where the hyphen belongs to the word (Anna-<lb/>Lena) -->
            <!-- no output -->
         </xsl:when>
         <xsl:otherwise>
            <xsl:text> </xsl:text>
         </xsl:otherwise>
      </xsl:choose>
207
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
208

209
210
211
212
213
214
215
216
   <xsl:template match="pb" mode="text_only">
      <xsl:text> </xsl:text>
   </xsl:template>

   <xsl:template match="space" mode="text_only">
      <xsl:text> </xsl:text>
   </xsl:template>

217
   <xsl:template match="note[@type='com']" mode="text_only">
218
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
219
220
221
   
   
   <!-- ++++++++++++ HTML +++++++++++++++++++ -->
222
223
224
225
226
227

   <xsl:template match="*" mode="html_for_whole_article">
      <xsl:if test=".//text()">
         <xsl:message>
            <xsl:text>Unknown element &lt;</xsl:text>
            <xsl:value-of select="local-name()" />
228
229
230
            <xsl:if test="@rendition">
               <xsl:text> rendition="</xsl:text>
               <xsl:value-of select="@rendition" />
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
               <xsl:text>"</xsl:text>
            </xsl:if>
            <xsl:if test="@type">
               <xsl:text> type="</xsl:text>
               <xsl:value-of select="@type" />
               <xsl:text>"</xsl:text>
            </xsl:if>
            <xsl:text>&gt; - first occurrence: </xsl:text>
         </xsl:message>
         <span class="unknown-element">
            <xsl:apply-templates mode="html_for_whole_article" />
         </span>
      </xsl:if>
   </xsl:template>
   
   <xsl:template match="body | div" mode="html_for_whole_article">
      <xsl:apply-templates mode="html_for_whole_article"/>
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
249
   
Dennis Neumann's avatar
Dennis Neumann committed
250
251
   <xsl:template match="opener | salute | seg | bibl | closer | signed | dateline | date" mode="html_for_whole_article">
      <div class="{local-name()}">
Dennis Neumann's avatar
Dennis Neumann committed
252
253
254
255
256
257
258
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>

   <xsl:template match="lb" mode="html_for_whole_article">
      <br />
   </xsl:template>
259

260
261
   <xsl:template match="space[@unit='lines']" mode="html_for_whole_article">
      <xsl:variable name="emptyLines" select="@quantity" />
262
263
264
265
266
267
268
269
270
271
      <xsl:choose>
         <xsl:when test="$emptyLines castable as xs:integer">
            <xsl:for-each select="1 to $emptyLines">
               <br />
            </xsl:for-each>
         </xsl:when>
         <xsl:otherwise>
            <br />
         </xsl:otherwise>
      </xsl:choose>
272
273
274
   </xsl:template>

   <xsl:template match="p" mode="html_for_whole_article">
Dennis Neumann's avatar
Dennis Neumann committed
275
      <article>
276
         <xsl:apply-templates mode="html_for_whole_article" />
Dennis Neumann's avatar
Dennis Neumann committed
277
      </article>
278
279
280
281
282
283
284
285
   </xsl:template>

   <xsl:template match="name[@type='place']" mode="html_for_whole_article">
      <div class="place">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
286
287
288
289
290
291
292
293
294
295
296
297
   <xsl:template match="name[@type='org']" mode="html_for_whole_article">
      <div class="org">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="name[@type='person']" mode="html_for_whole_article">
      <div class="person">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
298
299
300
301
302
303
   <xsl:template match="name[@type='object']" mode="html_for_whole_article">
      <div class="object">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
304
305
   <xsl:template match="pb" mode="html_for_whole_article">
      <div class="page-break">
Dennis Neumann's avatar
Dennis Neumann committed
306
         <xsl:value-of select="@n" />
307
308
309
      </div>
   </xsl:template>
   
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
   <xsl:template match="rs[@type='person']" mode="html_for_whole_article">
      <div class="rs-person">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="note[@type='com']" mode="html_for_whole_article">
      <div class="note-comment">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="hi[@rendition='simple:underline']" mode="html_for_whole_article">
      <div class="underline">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="hi[@rendition='simple:superscript']" mode="html_for_whole_article">
329
      <sup>
330
         <xsl:apply-templates mode="html_for_whole_article" />
331
      </sup>
332
333
334
335
336
337
338
339
340
341
342
343
344
345
   </xsl:template>
   
   <xsl:template match="hi[@rendition='simple:italic']" mode="html_for_whole_article">
      <div class="italic">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="ref[@target]" mode="html_for_whole_article">
      <a href="{@target}">
         <xsl:apply-templates mode="html_for_whole_article" />
      </a>
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
346
</xsl:stylesheet>