gfl-indexer.xslt 12.4 KB
Newer Older
Dennis Neumann's avatar
Dennis Neumann committed
1
2
<?xml version="1.0" encoding="utf-8"?>

Dennis Neumann's avatar
Dennis Neumann committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
<!-- 

This script produces Solr XML documents.


Field 'fulltext_html'

This field  contains the HTML representation of the text of a TEI document (e. g. a Goethe letter).
The Goethe letters are composed of different parts, for example 'opener', 'closer', 'salute'.
All those parts are represented here as <div>'s with the corresponding CSS classes.
The frontend viewer must decide how to format those parts and present them to the user.

Also, the original TEI files contain mark-up for many in-text parts, like dates, names, underlined words, etc.
Most of these are also transformed to <div>'s with their own CSS classes.
Although the in-text parts are by nature inline elements, we use here <div>'s and not <span>'s.
The reason is that Solr seems to have problems when highlighting fields that contain <span>'s
by sometimes producing corrupt HTML.
By using <div>'s, we avoid this problem.
In the frontend, these <div>'s must be set to 'display: inline'.

Some other in-text parts are transformed to special HTML elements.
For example, superscripted text is marked as <sup>, because HTML offers the appropriate element.

The project is still continuing and new TEI files are being produced.
That's why there might be new elements in the future that cannot be handled yet in this script.
The text of such TEI elements is enclosed in HTML elements of class 'unknown-element'.
Furthermore, a warning message is generated that contains data of the first occurrence of such a new element.






 -->

Dennis Neumann's avatar
Dennis Neumann committed
38
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
39
40
41
   xpath-default-namespace="http://www.tei-c.org/ns/1.0" xmlns:gfl="http://sub.gfl.de"
   xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:saxon="http://saxon.sf.net/" exclude-result-prefixes="gfl saxon xs">

Dennis Neumann's avatar
Dennis Neumann committed
42
   <xsl:output method="xml" indent="yes" saxon:suppress-indentation="div" />
Dennis Neumann's avatar
Dennis Neumann committed
43
   <xsl:strip-space elements="*" />
44
   <xsl:preserve-space elements="msIdentifier bibl p" />
45
46
47
48

   <xsl:template match="/">
      <add>
         <doc>
Dennis Neumann's avatar
Dennis Neumann committed
49
            <xsl:apply-templates select="TEI" />
50
51
52
         </doc>
      </add>
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
53

Dennis Neumann's avatar
Dennis Neumann committed
54
55
56
57
   <xsl:template match="TEI">
      <xsl:apply-templates select="teiHeader | text" />
   </xsl:template>

58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
   <xsl:template match="text()" mode="html_for_whole_article">
      <xsl:variable name="currentText" select="replace(., '\s+', ' ')" />
      <xsl:choose>
         <xsl:when test="ends-with(., '&#0173;')">
            <xsl:value-of select="replace($currentText, '&#0173;', '-')" />
         </xsl:when>
         <xsl:otherwise>
            <xsl:value-of select="$currentText" />
         </xsl:otherwise>
      </xsl:choose>
   </xsl:template>
   
   <xsl:template match="text()" mode="text_only">
      <xsl:variable name="currentText" select="replace(., '\s+', ' ')" />
      <xsl:choose>
         <xsl:when test="ends-with(., '-')">
Dennis Neumann's avatar
Dennis Neumann committed
74
75
            <!-- These are cases where one word is divided between two lines. 
            The minus sign is removed here, the line break (<lb/>) is removed in its own template. -->
76
77
78
            <xsl:value-of select="substring($currentText, 1, string-length($currentText)-1)" />
         </xsl:when>
         <xsl:when test="ends-with(., '&#0173;')">
Dennis Neumann's avatar
Dennis Neumann committed
79
80
81
            <!-- A soft hyphen is a convention to mark a hyphen that belongs to the word or is a hyphen on its own.
            For now, it is just replaced by a minus sign. 
            Later, it might be useful to differentiate between word divisions and hyphens. -->
82
83
84
85
86
87
            <xsl:value-of select="replace($currentText, '&#0173;', '-')" />
         </xsl:when>
         <xsl:otherwise>
            <xsl:value-of select="$currentText" />
         </xsl:otherwise>
      </xsl:choose>
Dennis Neumann's avatar
Dennis Neumann committed
88
89
90
91
92
   </xsl:template>
   
   <!--###########   Header   #######################-->
   
   <xsl:template match="teiHeader">
Dennis Neumann's avatar
Dennis Neumann committed
93
      <xsl:apply-templates select="fileDesc | profileDesc/textClass" />
Dennis Neumann's avatar
Dennis Neumann committed
94
95
96
97
98
99
100
   </xsl:template>
   
   <xsl:template match="fileDesc">
      <xsl:apply-templates select="titleStmt/title" />
      <xsl:apply-templates select="titleStmt/title[@type='desc']/name" />
      <xsl:apply-templates select="titleStmt/title[@type='desc']/date[@type='orn']" />
      <xsl:apply-templates select="titleStmt/author/name" />
101
      <xsl:apply-templates select="sourceDesc" />
Dennis Neumann's avatar
Dennis Neumann committed
102
103
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
104
105
106
107
   <xsl:template match="profileDesc/textClass">
      <xsl:apply-templates select="keywords/term" />
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
108
   <xsl:template match="title[@type='short']">
Dennis Neumann's avatar
Dennis Neumann committed
109
      <field name="short_title">
Dennis Neumann's avatar
Dennis Neumann committed
110
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
111
112
113
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
114
   <xsl:template match="title[@type='desc']">
Dennis Neumann's avatar
Dennis Neumann committed
115
      <field name="title">
Dennis Neumann's avatar
Dennis Neumann committed
116
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
117
118
119
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
120
   <xsl:template match="title/name[@type='place' and @subtype='orn']">
Dennis Neumann's avatar
Dennis Neumann committed
121
      <field name="origin_place">
Dennis Neumann's avatar
Dennis Neumann committed
122
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
123
124
125
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
126
   <xsl:template match="title/name[@type='place' and @subtype='dtn']">
Dennis Neumann's avatar
Dennis Neumann committed
127
      <field name="destination_place">
Dennis Neumann's avatar
Dennis Neumann committed
128
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
129
130
131
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
132
   <xsl:template match="title/name[@type='person' and @subtype='rcp']">
Dennis Neumann's avatar
Dennis Neumann committed
133
      <field name="recipient">
Dennis Neumann's avatar
Dennis Neumann committed
134
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
135
136
137
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
138
   <xsl:template match="title/date[@type='orn']">
Dennis Neumann's avatar
Dennis Neumann committed
139
140
141
142
143
      <field name="origin_date">
         <xsl:value-of select="@when" />
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
144
   <xsl:template match="author/name[@type='person' and @subtype='aut']">
Dennis Neumann's avatar
Dennis Neumann committed
145
      <field name="author">
Dennis Neumann's avatar
Dennis Neumann committed
146
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
147
148
      </field>
   </xsl:template>
149
150
151
152
153
154
   
   <xsl:template match="sourceDesc">
      <field name="source_description">
         <xsl:apply-templates mode="text_only" />
      </field>
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
155

Dennis Neumann's avatar
Dennis Neumann committed
156
157
158
159
160
161
162
163
164
165
166
167
168
   <xsl:template match="textClass/keywords[@scheme='#gnd']/term">
      <field name="gnd_keyword">
         <xsl:apply-templates mode="text_only" />
      </field>
   </xsl:template>

   <xsl:template match="textClass/keywords[@scheme='free']/term">
      <field name="free_keyword">
         <xsl:apply-templates mode="text_only" />
      </field>
   </xsl:template>


169

Dennis Neumann's avatar
Dennis Neumann committed
170
171
172
173
174
175
176
   <!--###################   text/body   ##########################-->

   <xsl:template match="text">
      <field name="id">
         <xsl:value-of select="@xml:id" />
      </field>
      <field name="fulltext">
Dennis Neumann's avatar
Dennis Neumann committed
177
         <xsl:apply-templates select="body" mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
178
      </field>
Dennis Neumann's avatar
Dennis Neumann committed
179
180
181
      <field name="fulltext_html">
         <xsl:text disable-output-escaping="yes">&lt;![CDATA[</xsl:text>
            <div class="article">
182
               <xsl:apply-templates mode="html_for_whole_article" />
Dennis Neumann's avatar
Dennis Neumann committed
183
184
185
            </div>
         <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
      </field>
186
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
187
188
   
   <xsl:template match="body | div" mode="text_only">
189
      <xsl:apply-templates mode="text_only"/>
Dennis Neumann's avatar
Dennis Neumann committed
190
191
192
193
194
195
   </xsl:template>

   <xsl:template match="p" mode="text_only">
      <xsl:apply-templates mode="text_only" />
      <xsl:text> </xsl:text>
   </xsl:template>
196

197
   <xsl:template match="lb" mode="text_only">
198
199
200
201
202
203
204
205
206
207
208
209
210
211
      <xsl:variable name="precedingText" select="preceding-sibling::text()[1]" />
      <xsl:choose>
         <xsl:when test="ends-with($precedingText, '-')">
            <!-- Cases where a word is divided between two lines -->
            <!-- no output -->
         </xsl:when>
         <xsl:when test="ends-with($precedingText, '&#0173;') and not(ends-with($precedingText, ' &#0173;'))">
            <!-- Cases where the hyphen belongs to the word (Anna-<lb/>Lena) -->
            <!-- no output -->
         </xsl:when>
         <xsl:otherwise>
            <xsl:text> </xsl:text>
         </xsl:otherwise>
      </xsl:choose>
212
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
213

214
215
216
217
218
219
220
221
   <xsl:template match="pb" mode="text_only">
      <xsl:text> </xsl:text>
   </xsl:template>

   <xsl:template match="space" mode="text_only">
      <xsl:text> </xsl:text>
   </xsl:template>

222
   <xsl:template match="note[@type='com']" mode="text_only">
223
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
224
225
226
   
   
   <!-- ++++++++++++ HTML +++++++++++++++++++ -->
227
228
229
230
231
232

   <xsl:template match="*" mode="html_for_whole_article">
      <xsl:if test=".//text()">
         <xsl:message>
            <xsl:text>Unknown element &lt;</xsl:text>
            <xsl:value-of select="local-name()" />
233
234
235
            <xsl:if test="@rendition">
               <xsl:text> rendition="</xsl:text>
               <xsl:value-of select="@rendition" />
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
               <xsl:text>"</xsl:text>
            </xsl:if>
            <xsl:if test="@type">
               <xsl:text> type="</xsl:text>
               <xsl:value-of select="@type" />
               <xsl:text>"</xsl:text>
            </xsl:if>
            <xsl:text>&gt; - first occurrence: </xsl:text>
         </xsl:message>
         <span class="unknown-element">
            <xsl:apply-templates mode="html_for_whole_article" />
         </span>
      </xsl:if>
   </xsl:template>
   
   <xsl:template match="body | div" mode="html_for_whole_article">
      <xsl:apply-templates mode="html_for_whole_article"/>
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
254
   
Dennis Neumann's avatar
Dennis Neumann committed
255
256
   <xsl:template match="opener | salute | seg | bibl | closer | signed | dateline | date" mode="html_for_whole_article">
      <div class="{local-name()}">
Dennis Neumann's avatar
Dennis Neumann committed
257
258
259
260
261
262
263
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>

   <xsl:template match="lb" mode="html_for_whole_article">
      <br />
   </xsl:template>
264

265
266
   <xsl:template match="space[@unit='lines']" mode="html_for_whole_article">
      <xsl:variable name="emptyLines" select="@quantity" />
267
268
269
270
271
272
273
274
275
276
      <xsl:choose>
         <xsl:when test="$emptyLines castable as xs:integer">
            <xsl:for-each select="1 to $emptyLines">
               <br />
            </xsl:for-each>
         </xsl:when>
         <xsl:otherwise>
            <br />
         </xsl:otherwise>
      </xsl:choose>
277
278
279
   </xsl:template>

   <xsl:template match="p" mode="html_for_whole_article">
Dennis Neumann's avatar
Dennis Neumann committed
280
      <article>
281
         <xsl:apply-templates mode="html_for_whole_article" />
Dennis Neumann's avatar
Dennis Neumann committed
282
      </article>
283
284
285
286
287
288
289
290
   </xsl:template>

   <xsl:template match="name[@type='place']" mode="html_for_whole_article">
      <div class="place">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
291
292
293
294
295
296
297
298
299
300
301
302
   <xsl:template match="name[@type='org']" mode="html_for_whole_article">
      <div class="org">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="name[@type='person']" mode="html_for_whole_article">
      <div class="person">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
303
304
305
306
307
308
   <xsl:template match="name[@type='object']" mode="html_for_whole_article">
      <div class="object">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
309
310
   <xsl:template match="pb" mode="html_for_whole_article">
      <div class="page-break">
311
312
313
314
315
316
317
318
         <xsl:variable name="facsId" select="substring(@facs, 2, string-length(@facs))" />
         <xsl:variable name="graphicUrl" select="id($facsId)/@url" />
         
         <!--xsl:if test="$graphicUrl"-->
         <a href="/image/EPN_230345212_0010-0" target="_blank">
            <xsl:value-of select="@n" />
         </a>
         <!--/xsl:if-->
319
320
321
      </div>
   </xsl:template>
   
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
   <xsl:template match="rs[@type='person']" mode="html_for_whole_article">
      <div class="rs-person">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="note[@type='com']" mode="html_for_whole_article">
      <div class="note-comment">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="hi[@rendition='simple:underline']" mode="html_for_whole_article">
      <div class="underline">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="hi[@rendition='simple:superscript']" mode="html_for_whole_article">
341
      <sup>
342
         <xsl:apply-templates mode="html_for_whole_article" />
343
      </sup>
344
345
346
347
348
349
350
351
352
353
354
355
356
357
   </xsl:template>
   
   <xsl:template match="hi[@rendition='simple:italic']" mode="html_for_whole_article">
      <div class="italic">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="ref[@target]" mode="html_for_whole_article">
      <a href="{@target}">
         <xsl:apply-templates mode="html_for_whole_article" />
      </a>
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
358
</xsl:stylesheet>