Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Goethes Farbenlehre
gfl-importer
Commits
7a1dce0b
Commit
7a1dce0b
authored
Jul 10, 2019
by
Dennis Neumann
Browse files
Read languages from TEI
parent
996cdd5f
Changes
7
Hide whitespace changes
Inline
Side-by-side
gfl-plugin/src/main/resources/gfl-indexer.xslt
View file @
7a1dce0b
...
...
@@ -44,7 +44,7 @@ The second kind of documents that are produced are page documents.
The resulting pages are in the HTML format.
As the TEI file is processed, the TEI XML structure is split into pages using
the page beginning elements (<pb/>).
Refer to comment in the code to understand the used algorithm.
Refer to comment
s
in the code to understand the used algorithm.
-->
...
...
@@ -65,6 +65,11 @@ Refer to comment in the code to understand the used algorithm.
<add>
<doc>
<xsl:apply-templates
select=
"teiHeader | text"
/>
<xsl:for-each
select=
"distinct-values(//@xml:lang)"
>
<field
name=
"language"
>
<xsl:value-of
select=
"."
/>
</field>
</xsl:for-each>
</doc>
<xsl:apply-templates
select=
"text"
mode=
"page_splitting"
/>
</add>
...
...
gfl-plugin/src/test/java/sub/gfl/SplitTest.java
deleted
100644 → 0
View file @
996cdd5f
package
sub.gfl
;
import
static
org
.
junit
.
Assert
.*;
import
java.io.ByteArrayOutputStream
;
import
java.io.OutputStream
;
import
org.junit.After
;
import
org.junit.Before
;
import
org.junit.BeforeClass
;
import
org.junit.Test
;
import
net.sf.saxon.s9api.SaxonApiException
;
import
sub.ent.backend.Xslt
;
public
class
SplitTest
{
private
OutputStream
outputBaos
;
private
static
Xslt
xslt
;
@BeforeClass
public
static
void
beforeAllTests
()
throws
Exception
{
xslt
=
new
Xslt
();
xslt
.
setXsltScript
(
"src/test/resources/split-foreach-group.xslt"
);
}
@Before
public
void
beforeEachTest
()
throws
Exception
{
outputBaos
=
new
ByteArrayOutputStream
();
}
@After
public
void
afterEachTest
()
{
System
.
out
.
println
(
outputBaos
.
toString
());
}
@Test
public
void
testGroup
()
throws
Exception
{
String
result
=
transform
(
"two-page-beginnings.xml"
);
}
private
String
transform
(
String
fileName
)
throws
SaxonApiException
{
xslt
.
transform
(
"src/test/resources/tei-snippets-split/"
+
fileName
,
outputBaos
);
return
outputBaos
.
toString
();
}
}
gfl-plugin/src/test/java/sub/gfl/XsltTest.java
View file @
7a1dce0b
...
...
@@ -35,6 +35,22 @@ public class XsltTest {
System
.
out
.
println
(
outputBaos
.
toString
());
}
@Test
public
void
twoDifferentLanguages
()
throws
Exception
{
String
result
=
transform
(
"language_twoDifferentEntries.xml"
);
assertXpathEvaluatesTo
(
"2"
,
"count(//field[@name='language'])"
,
result
);
assertXpathEvaluatesTo
(
"ger"
,
"//field[@name='language'][1]"
,
result
);
assertXpathEvaluatesTo
(
"eng"
,
"//field[@name='language'][2]"
,
result
);
}
@Test
public
void
oneLanguage
()
throws
Exception
{
String
result
=
transform
(
"language.xml"
);
assertXpathEvaluatesTo
(
"ger"
,
"//field[@name='language']"
,
result
);
}
@Test
public
void
spaceAfterAddressLine
()
throws
Exception
{
String
result
=
transform
(
"address-in-opener.xml"
);
...
...
gfl-plugin/src/test/resources/split-foreach-group.xslt
deleted
100644 → 0
View file @
996cdd5f
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet
xmlns:xsl=
"http://www.w3.org/1999/XSL/Transform"
xpath-default-namespace=
"http://www.tei-c.org/ns/1.0"
version=
"2.0"
>
<xsl:output
method=
"xml"
indent=
"yes"
/>
<xsl:template
match=
"/"
>
<add>
<doc>
<xsl:apply-templates
select=
"TEI"
/>
</doc>
</add>
</xsl:template>
<xsl:template
match=
"TEI"
>
<xsl:apply-templates
select=
"text/body"
/>
</xsl:template>
<xsl:template
match=
"body"
>
<xsl:apply-templates
/>
</xsl:template>
<xsl:template
match=
"div"
>
<xsl:variable
name=
"context"
select=
"."
/>
<xsl:for-each-group
select=
"descendant::node()[not(node())]"
group-starting-with=
"pb"
>
<xsl:if
test=
"self::pb"
>
<field
name=
"html_page"
>
<div
class=
"page"
>
<div
class=
"page-beginning"
>
<xsl:value-of
select=
"count(self::pb/preceding::pb) + 1"
/>
<xsl:value-of
select=
"self::pb/@n"
/>
</div>
<xsl:apply-templates
select=
"$context/*"
mode=
"split"
>
<xsl:with-param
name=
"restricted-to"
select=
"current-group()/ancestor-or-self::node()"
tunnel=
"yes"
/>
</xsl:apply-templates>
</div>
</field>
</xsl:if>
</xsl:for-each-group>
</xsl:template>
<xsl:template
match=
"p | name"
mode=
"split"
>
<xsl:param
name=
"restricted-to"
tunnel=
"yes"
/>
<xsl:if
test=
"exists(. intersect $restricted-to)"
>
<div
class=
"{local-name(.)}"
>
<xsl:apply-templates
mode=
"split"
/>
</div>
</xsl:if>
</xsl:template>
<xsl:template
match=
"text()"
mode=
"split"
>
<xsl:param
name=
"restricted-to"
tunnel=
"yes"
/>
<xsl:if
test=
"exists(. intersect $restricted-to)"
>
<xsl:copy
/>
</xsl:if>
</xsl:template>
<xsl:template
match=
"pb"
mode=
"split"
/>
</xsl:stylesheet>
\ No newline at end of file
gfl-plugin/src/test/resources/tei-snippets/language.xml
0 → 100644
View file @
7a1dce0b
<?xml version="1.0" encoding="UTF-8"?>
<TEI
xmlns=
"http://www.tei-c.org/ns/1.0"
>
<teiHeader>
</teiHeader>
<text
xml:id=
"my_id"
xml:lang=
"ger"
>
<body>
<div>
<p>
Test text.
</p>
</div>
</body>
</text>
</TEI>
\ No newline at end of file
gfl-plugin/src/test/resources/tei-snippets/language_twoDifferentEntries.xml
0 → 100644
View file @
7a1dce0b
<?xml version="1.0" encoding="UTF-8"?>
<TEI
xmlns=
"http://www.tei-c.org/ns/1.0"
>
<teiHeader>
</teiHeader>
<text
xml:id=
"my_id"
xml:lang=
"ger"
>
<body>
<div
xml:lang=
"ger"
>
<p
xml:lang=
"eng"
>
Test text.
</p>
</div>
</body>
</text>
</TEI>
\ No newline at end of file
solr/gfl/conf/schema.xml
View file @
7a1dce0b
...
...
@@ -56,6 +56,7 @@
<!-- for doctype 'article' -->
<field
name=
"number_of_pages"
type=
"int"
/>
<field
name=
"language"
type=
"string"
multiValued=
"true"
/>
<field
name=
"fulltext"
type=
"text_de"
multiValued=
"false"
/>
<field
name=
"fulltext_html"
type=
"text_de"
multiValued=
"false"
/>
<field
name=
"short_title"
type=
"text_de"
multiValued=
"false"
/>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment