gfl-indexer.xslt 10.1 KB
Newer Older
Dennis Neumann's avatar
Dennis Neumann committed
1
2
<?xml version="1.0" encoding="utf-8"?>

Dennis Neumann's avatar
Dennis Neumann committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
<!-- 

This script produces Solr XML documents.


Field 'fulltext_html'

This field  contains the HTML representation of the text of a TEI document (e. g. a Goethe letter).
The Goethe letters are composed of different parts, for example 'opener', 'closer', 'salute'.
All those parts are represented here as <div>'s with the corresponding CSS classes.
The frontend viewer must decide how to format those parts and present them to the user.

Also, the original TEI files contain mark-up for many in-text parts, like dates, names, underlined words, etc.
Most of these are also transformed to <div>'s with their own CSS classes.
Although the in-text parts are by nature inline elements, we use here <div>'s and not <span>'s.
The reason is that Solr seems to have problems when highlighting fields that contain <span>'s
by sometimes producing corrupt HTML.
By using <div>'s, we avoid this problem.
In the frontend, these <div>'s must be set to 'display: inline'.

Some other in-text parts are transformed to special HTML elements.
For example, superscripted text is marked as <sup>, because HTML offers the appropriate element.

The project is still continuing and new TEI files are being produced.
That's why there might be new elements in the future that cannot be handled yet in this script.
The text of such TEI elements is enclosed in HTML elements of class 'unknown-element'.
Furthermore, a warning message is generated that contains data of the first occurrence of such a new element.






 -->

Dennis Neumann's avatar
Dennis Neumann committed
38
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
39
40
41
   xpath-default-namespace="http://www.tei-c.org/ns/1.0" xmlns:gfl="http://sub.gfl.de"
   xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:saxon="http://saxon.sf.net/" exclude-result-prefixes="gfl saxon xs">

Dennis Neumann's avatar
Dennis Neumann committed
42
   <xsl:output method="xml" indent="yes" saxon:suppress-indentation="div" />
Dennis Neumann's avatar
Dennis Neumann committed
43
   <xsl:strip-space elements="*" />
44
   <xsl:preserve-space elements="msIdentifier bibl p" />
45
46
47
48

   <xsl:template match="/">
      <add>
         <doc>
Dennis Neumann's avatar
Dennis Neumann committed
49
            <xsl:apply-templates select="TEI" />
50
51
52
         </doc>
      </add>
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
53

Dennis Neumann's avatar
Dennis Neumann committed
54
55
56
57
58
   <xsl:template match="TEI">
      <xsl:apply-templates select="teiHeader | text" />
   </xsl:template>

   <xsl:template match="text()" mode="#all">
Dennis Neumann's avatar
Dennis Neumann committed
59
      <xsl:value-of select="replace(., '\s+', ' ')" />
Dennis Neumann's avatar
Dennis Neumann committed
60
61
62
63
64
   </xsl:template>
   
   <!--###########   Header   #######################-->
   
   <xsl:template match="teiHeader">
Dennis Neumann's avatar
Dennis Neumann committed
65
      <xsl:apply-templates select="fileDesc | profileDesc/textClass" />
Dennis Neumann's avatar
Dennis Neumann committed
66
67
68
69
70
71
72
   </xsl:template>
   
   <xsl:template match="fileDesc">
      <xsl:apply-templates select="titleStmt/title" />
      <xsl:apply-templates select="titleStmt/title[@type='desc']/name" />
      <xsl:apply-templates select="titleStmt/title[@type='desc']/date[@type='orn']" />
      <xsl:apply-templates select="titleStmt/author/name" />
73
      <xsl:apply-templates select="sourceDesc" />
Dennis Neumann's avatar
Dennis Neumann committed
74
75
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
76
77
78
79
   <xsl:template match="profileDesc/textClass">
      <xsl:apply-templates select="keywords/term" />
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
80
   <xsl:template match="title[@type='short']">
Dennis Neumann's avatar
Dennis Neumann committed
81
      <field name="short_title">
Dennis Neumann's avatar
Dennis Neumann committed
82
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
83
84
85
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
86
   <xsl:template match="title[@type='desc']">
Dennis Neumann's avatar
Dennis Neumann committed
87
      <field name="title">
Dennis Neumann's avatar
Dennis Neumann committed
88
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
89
90
91
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
92
   <xsl:template match="title/name[@type='place' and @subtype='orn']">
Dennis Neumann's avatar
Dennis Neumann committed
93
      <field name="origin_place">
Dennis Neumann's avatar
Dennis Neumann committed
94
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
95
96
97
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
98
   <xsl:template match="title/name[@type='place' and @subtype='dtn']">
Dennis Neumann's avatar
Dennis Neumann committed
99
      <field name="destination_place">
Dennis Neumann's avatar
Dennis Neumann committed
100
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
101
102
103
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
104
   <xsl:template match="title/name[@type='person' and @subtype='rcp']">
Dennis Neumann's avatar
Dennis Neumann committed
105
      <field name="recipient">
Dennis Neumann's avatar
Dennis Neumann committed
106
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
107
108
109
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
110
   <xsl:template match="title/date[@type='orn']">
Dennis Neumann's avatar
Dennis Neumann committed
111
112
113
114
115
      <field name="origin_date">
         <xsl:value-of select="@when" />
      </field>
   </xsl:template>

Dennis Neumann's avatar
Dennis Neumann committed
116
   <xsl:template match="author/name[@type='person' and @subtype='aut']">
Dennis Neumann's avatar
Dennis Neumann committed
117
      <field name="author">
Dennis Neumann's avatar
Dennis Neumann committed
118
         <xsl:apply-templates mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
119
120
      </field>
   </xsl:template>
121
122
123
124
125
126
   
   <xsl:template match="sourceDesc">
      <field name="source_description">
         <xsl:apply-templates mode="text_only" />
      </field>
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
127

Dennis Neumann's avatar
Dennis Neumann committed
128
129
130
131
132
133
134
135
136
137
138
139
140
   <xsl:template match="textClass/keywords[@scheme='#gnd']/term">
      <field name="gnd_keyword">
         <xsl:apply-templates mode="text_only" />
      </field>
   </xsl:template>

   <xsl:template match="textClass/keywords[@scheme='free']/term">
      <field name="free_keyword">
         <xsl:apply-templates mode="text_only" />
      </field>
   </xsl:template>


141

Dennis Neumann's avatar
Dennis Neumann committed
142
143
144
145
146
147
148
   <!--###################   text/body   ##########################-->

   <xsl:template match="text">
      <field name="id">
         <xsl:value-of select="@xml:id" />
      </field>
      <field name="fulltext">
Dennis Neumann's avatar
Dennis Neumann committed
149
         <xsl:apply-templates select="body" mode="text_only" />
Dennis Neumann's avatar
Dennis Neumann committed
150
      </field>
Dennis Neumann's avatar
Dennis Neumann committed
151
152
153
      <field name="fulltext_html">
         <xsl:text disable-output-escaping="yes">&lt;![CDATA[</xsl:text>
            <div class="article">
154
               <xsl:apply-templates mode="html_for_whole_article" />
Dennis Neumann's avatar
Dennis Neumann committed
155
156
157
            </div>
         <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
      </field>
158
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
159
160
   
   <xsl:template match="body | div" mode="text_only">
161
      <xsl:apply-templates mode="text_only"/>
Dennis Neumann's avatar
Dennis Neumann committed
162
163
164
165
166
167
   </xsl:template>

   <xsl:template match="p" mode="text_only">
      <xsl:apply-templates mode="text_only" />
      <xsl:text> </xsl:text>
   </xsl:template>
168

169
   <xsl:template match="lb" mode="text_only">
170
171
      <xsl:text> </xsl:text>
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
172

173
174
175
176
177
178
179
180
   <xsl:template match="pb" mode="text_only">
      <xsl:text> </xsl:text>
   </xsl:template>

   <xsl:template match="space" mode="text_only">
      <xsl:text> </xsl:text>
   </xsl:template>

181
   <xsl:template match="note[@type='com']" mode="text_only">
182
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
183
184
185
   
   
   <!-- ++++++++++++ HTML +++++++++++++++++++ -->
186
187
188
189
190
191

   <xsl:template match="*" mode="html_for_whole_article">
      <xsl:if test=".//text()">
         <xsl:message>
            <xsl:text>Unknown element &lt;</xsl:text>
            <xsl:value-of select="local-name()" />
192
193
194
            <xsl:if test="@rendition">
               <xsl:text> rendition="</xsl:text>
               <xsl:value-of select="@rendition" />
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
               <xsl:text>"</xsl:text>
            </xsl:if>
            <xsl:if test="@type">
               <xsl:text> type="</xsl:text>
               <xsl:value-of select="@type" />
               <xsl:text>"</xsl:text>
            </xsl:if>
            <xsl:text>&gt; - first occurrence: </xsl:text>
         </xsl:message>
         <span class="unknown-element">
            <xsl:apply-templates mode="html_for_whole_article" />
         </span>
      </xsl:if>
   </xsl:template>
   
   <xsl:template match="body | div" mode="html_for_whole_article">
      <xsl:apply-templates mode="html_for_whole_article"/>
   </xsl:template>
Dennis Neumann's avatar
Dennis Neumann committed
213
   
Dennis Neumann's avatar
Dennis Neumann committed
214
215
   <xsl:template match="opener | salute | seg | bibl | closer | signed | dateline | date" mode="html_for_whole_article">
      <div class="{local-name()}">
Dennis Neumann's avatar
Dennis Neumann committed
216
217
218
219
220
221
222
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>

   <xsl:template match="lb" mode="html_for_whole_article">
      <br />
   </xsl:template>
223

224
225
   <xsl:template match="space[@unit='lines']" mode="html_for_whole_article">
      <xsl:variable name="emptyLines" select="@quantity" />
226
227
228
229
230
231
232
233
234
235
      <xsl:choose>
         <xsl:when test="$emptyLines castable as xs:integer">
            <xsl:for-each select="1 to $emptyLines">
               <br />
            </xsl:for-each>
         </xsl:when>
         <xsl:otherwise>
            <br />
         </xsl:otherwise>
      </xsl:choose>
236
237
238
   </xsl:template>

   <xsl:template match="p" mode="html_for_whole_article">
Dennis Neumann's avatar
Dennis Neumann committed
239
      <article>
240
         <xsl:apply-templates mode="html_for_whole_article" />
Dennis Neumann's avatar
Dennis Neumann committed
241
      </article>
242
243
244
245
246
247
248
249
   </xsl:template>

   <xsl:template match="name[@type='place']" mode="html_for_whole_article">
      <div class="place">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
250
251
252
253
254
255
256
257
258
259
260
261
   <xsl:template match="name[@type='org']" mode="html_for_whole_article">
      <div class="org">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="name[@type='person']" mode="html_for_whole_article">
      <div class="person">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
262
263
264
265
266
267
   <xsl:template match="name[@type='object']" mode="html_for_whole_article">
      <div class="object">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
268
269
   <xsl:template match="pb" mode="html_for_whole_article">
      <div class="page-break">
Dennis Neumann's avatar
Dennis Neumann committed
270
         <xsl:value-of select="@n" />
271
272
273
      </div>
   </xsl:template>
   
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
   <xsl:template match="rs[@type='person']" mode="html_for_whole_article">
      <div class="rs-person">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="note[@type='com']" mode="html_for_whole_article">
      <div class="note-comment">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="hi[@rendition='simple:underline']" mode="html_for_whole_article">
      <div class="underline">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="hi[@rendition='simple:superscript']" mode="html_for_whole_article">
293
      <sup>
294
         <xsl:apply-templates mode="html_for_whole_article" />
295
      </sup>
296
297
298
299
300
301
302
303
304
305
306
307
308
309
   </xsl:template>
   
   <xsl:template match="hi[@rendition='simple:italic']" mode="html_for_whole_article">
      <div class="italic">
         <xsl:apply-templates mode="html_for_whole_article" />
      </div>
   </xsl:template>
   
   <xsl:template match="ref[@target]" mode="html_for_whole_article">
      <a href="{@target}">
         <xsl:apply-templates mode="html_for_whole_article" />
      </a>
   </xsl:template>
   
Dennis Neumann's avatar
Dennis Neumann committed
310
</xsl:stylesheet>