Commit 6dac6b44 authored by Dennis Neumann's avatar Dennis Neumann
Browse files

Join words that are divided between lines

parent 4d7a9009
......@@ -55,8 +55,31 @@ Furthermore, a warning message is generated that contains data of the first occu
<xsl:apply-templates select="teiHeader | text" />
</xsl:template>
<xsl:template match="text()" mode="#all">
<xsl:value-of select="replace(., '\s+', ' ')" />
<xsl:template match="text()" mode="html_for_whole_article">
<xsl:variable name="currentText" select="replace(., '\s+', ' ')" />
<xsl:choose>
<xsl:when test="ends-with(., '&#0173;')">
<xsl:value-of select="replace($currentText, '&#0173;', '-')" />
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$currentText" />
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template match="text()" mode="text_only">
<xsl:variable name="currentText" select="replace(., '\s+', ' ')" />
<xsl:choose>
<xsl:when test="ends-with(., '-')">
<xsl:value-of select="substring($currentText, 1, string-length($currentText)-1)" />
</xsl:when>
<xsl:when test="ends-with(., '&#0173;')">
<xsl:value-of select="replace($currentText, '&#0173;', '-')" />
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$currentText" />
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<!--########### Header #######################-->
......@@ -167,7 +190,20 @@ Furthermore, a warning message is generated that contains data of the first occu
</xsl:template>
<xsl:template match="lb" mode="text_only">
<xsl:text> </xsl:text>
<xsl:variable name="precedingText" select="preceding-sibling::text()[1]" />
<xsl:choose>
<xsl:when test="ends-with($precedingText, '-')">
<!-- Cases where a word is divided between two lines -->
<!-- no output -->
</xsl:when>
<xsl:when test="ends-with($precedingText, '&#0173;') and not(ends-with($precedingText, ' &#0173;'))">
<!-- Cases where the hyphen belongs to the word (Anna-<lb/>Lena) -->
<!-- no output -->
</xsl:when>
<xsl:otherwise>
<xsl:text> </xsl:text>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template match="pb" mode="text_only">
......
......@@ -36,6 +36,13 @@ public class XsltHtmlTest {
System.out.println(outputBaos.toString());
}
@Test
public void softHyphen_convertsToMinus() throws Exception {
String html = transform("invisible-softhyphen-0173.xml");
assertXpathEvaluatesTo("auf- und ab", "//div[@class='article']", html);
}
@Test
public void datelineWithDate() throws Exception {
String html = transform("dateline-with-date.xml");
......
......@@ -35,6 +35,13 @@ public class XsltTest {
System.out.println(outputBaos.toString());
}
@Test
public void differentHyphensAtLinebreak() throws Exception {
String result = transform("hyphens.xml");
assertXpathEvaluatesTo("Worttrennung soft hyphen: - New sentence. Anna-Lena ", "//field[@name='fulltext'][1]", result);
}
@Test
public void pageBreak_makesSpace() throws Exception {
String result = transform("page-break.xml");
......
<?xml version="1.0" encoding="UTF-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0">
<teiHeader>
</teiHeader>
<text xml:id="my_id" xml:lang="ger">
<body>
<div>
<p>Wort-<lb/>trennung</p>
<p>soft hyphen: ­<lb/>New sentence.</p>
<p>Anna­<lb/>Lena</p>
</div>
</body>
</text>
</TEI>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0">
<teiHeader>
</teiHeader>
<text xml:id="my_id" xml:lang="ger">
<body>
<div>
<p>auf­<lb/> und ab</p>
</div>
</body>
</text>
</TEI>
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment