Commit 7f7e1382 authored by Dennis Neumann's avatar Dennis Neumann
Browse files

Add first working XSLT script

parent 7e5cd8f3
......@@ -2,6 +2,7 @@ package sub.gfl.api;
import java.io.File;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Collections;
import java.util.List;
import java.util.Map;
......@@ -22,6 +23,7 @@ public class ImporterStepConvert extends ImporterStep {
File outputDir = new File(solrXmlDir);
File inputDir = new File(gitDir);
fileAccess.cleanDir(outputDir);
out.println(" Converting TEIs to index files:");
InputStream xsltStream = ImporterStepConvert.class.getResourceAsStream("/gfl-indexer.xslt");
......@@ -30,16 +32,22 @@ public class ImporterStepConvert extends ImporterStep {
List<File> allFiles = fileAccess.getAllXmlFilesFromDir(inputDir);
Collections.sort(allFiles);
String xml = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
+ "<add>\n"
+ "<doc>\n"
+ "<field name=\"id\">1</field>\n"
+ "<field name=\"fulltext\">mein volltext</field>\n"
+ "</doc>\n"
+ "</add>";
FileUtils.writeStringToFile(new File(outputDir, "test.xml"), xml, "UTF8");
int currentId = 1;
for (File currentFile : allFiles) {
printCurrentStatus(currentId, allFiles.size());
xslt.setParameter("currentArticleId", currentId + "");
OutputStream fileOs = fileAccess.createOutputStream(new File(solrXmlDir), currentFile.getName());
xslt.transform(currentFile.getAbsolutePath(), fileOs);
currentId++;
}
}
private void printCurrentStatus(int currentNumber, int lastNumber) {
if (currentNumber % 1000 == 0 || currentNumber == lastNumber) {
out.println(" ... " + currentNumber);
}
}
@Override
......
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xpath-default-namespace="http://www.tei-c.org/ns/1.0" xmlns:gfl="http://sub.gfl.de"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:saxon="http://saxon.sf.net/" exclude-result-prefixes="gfl saxon xs">
<xsl:output method="xml" indent="yes" />
<xsl:strip-space elements="*" />
<xsl:param name="currentArticleId" />
<xsl:template match="/">
<add>
<doc>
<field name="id">
<xsl:value-of select="$currentArticleId" />
</field>
<field name="fulltext">
<xsl:value-of select="TEI/text/body" />
</field>
</doc>
</add>
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment