From 3ece7cae0602c4386a367be815cc5f5c9eff304d Mon Sep 17 00:00:00 2001
From: Thorsten Vitt <thorsten.vitt@uni-wuerzburg.de>
Date: Fri, 13 Dec 2019 15:02:47 +0100
Subject: [PATCH] Plain text support for the aggregator

---
 .../textgrid/services/aggregator/REST.java    | 19 +++++++
 .../aggregator/text/PlainTextWriter.java      | 49 +++++++++++++++++++
 .../services/aggregator/PlainTextIT.java      | 38 ++++++++++++++
 3 files changed, 106 insertions(+)
 create mode 100644 src/main/java/info/textgrid/services/aggregator/text/PlainTextWriter.java
 create mode 100644 src/test/java/info/textgrid/services/aggregator/PlainTextIT.java

diff --git a/src/main/java/info/textgrid/services/aggregator/REST.java b/src/main/java/info/textgrid/services/aggregator/REST.java
index 1143718..a0dec12 100644
--- a/src/main/java/info/textgrid/services/aggregator/REST.java
+++ b/src/main/java/info/textgrid/services/aggregator/REST.java
@@ -21,6 +21,7 @@
 import javax.ws.rs.core.StreamingOutput;
 import javax.ws.rs.core.UriInfo;
 
+import info.textgrid.services.aggregator.text.PlainTextWriter;
 import org.apache.cxf.jaxrs.model.wadl.Description;
 import org.apache.cxf.jaxrs.model.wadl.Descriptions;
 import org.apache.cxf.jaxrs.model.wadl.DocTarget;
@@ -150,6 +151,24 @@ public Response getHTML(
 		return writer.createResponse().build();
 	}
 
+	@GET
+	@Path(value = "/text/{object}")
+	@Produces(value = "text/plain")
+	@Descriptions({
+			@Description(target = DocTarget.METHOD, value="Generates plain text output."),
+			@Description(target = DocTarget.RETURN, value = "A plain text document.")
+	})
+	public Response getText(
+			@Description("The TextGrid URIs of the TEI document(s) or aggregation(s) to transform, separated by commas") @PathParam("object") final String uriList,
+			@Description("Session ID to access protected resources") @QueryParam("sid") final String sid,
+			@Description("Also access sandboxed data") @QueryParam("sandbox") final boolean sandbox,
+            @Context final Request request) throws IoFault, AuthFault, IOException, SaxonApiException, ObjectNotFoundFault, MetadataParseFault, ProtocolNotImplementedFault {
+		PlainTextWriter writer = new PlainTextWriter(repository, getStylesheetManager(), uriList, request);
+		writer.sid(sid);
+		writer.sandbox(sandbox);
+		return writer.createResponse().build();
+	}
+
 	@GET
 	@Path(value = "/zip/{objects}")
 	@Produces("application/zip")
diff --git a/src/main/java/info/textgrid/services/aggregator/text/PlainTextWriter.java b/src/main/java/info/textgrid/services/aggregator/text/PlainTextWriter.java
new file mode 100644
index 0000000..200ac8d
--- /dev/null
+++ b/src/main/java/info/textgrid/services/aggregator/text/PlainTextWriter.java
@@ -0,0 +1,49 @@
+package info.textgrid.services.aggregator.text;
+
+import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.*;
+import info.textgrid.services.aggregator.ITextGridRep;
+import info.textgrid.services.aggregator.html.TGUriResolver;
+import info.textgrid.services.aggregator.teicorpus.CorpusBasedExporter;
+import info.textgrid.services.aggregator.util.StylesheetManager;
+import net.sf.saxon.s9api.SaxonApiException;
+import net.sf.saxon.s9api.Serializer;
+import net.sf.saxon.s9api.XsltTransformer;
+
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Request;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.net.URI;
+
+public class PlainTextWriter extends CorpusBasedExporter {
+    private static final URI TO_TEXT_XSL = java.net.URI.create("/tei-stylesheets/txt/tei-to-text.xsl");
+
+    private final StylesheetManager stylesheetManager;
+
+    public PlainTextWriter(ITextGridRep repository, StylesheetManager stylesheetManager, String uriList, Request request) {
+        super(repository, request, uriList);
+        this.stylesheetManager = stylesheetManager;
+        setMediaType(MediaType.TEXT_PLAIN + "; charset=UTF-8");
+        setFileExtension(".txt");
+    }
+
+    @Override
+    public void write(OutputStream output) throws IOException, WebApplicationException {
+        try {
+            final XsltTransformer transformer = stylesheetManager.getStylesheet(TO_TEXT_XSL, getSid(), false, true).load();
+            if (getSid().isPresent())
+                transformer.setURIResolver(new TGUriResolver(repository, getSid()));
+            transformer.setSource(loadSource(false));
+            final Serializer serializer = stylesheetManager.xsltProcessor.newSerializer(output);
+            transformer.setDestination(serializer);
+            transformer.transform();
+        } catch (SaxonApiException | IoFault | MetadataParseFault | ProtocolNotImplementedFault e) {
+            throw new WebApplicationException(e);
+        } catch (ObjectNotFoundFault objectNotFoundFault) {
+            throw new WebApplicationException(objectNotFoundFault, 404);
+        } catch (AuthFault authFault) {
+            throw new WebApplicationException(authFault, 403);
+        }
+    }
+}
diff --git a/src/test/java/info/textgrid/services/aggregator/PlainTextIT.java b/src/test/java/info/textgrid/services/aggregator/PlainTextIT.java
new file mode 100644
index 0000000..47b3fd0
--- /dev/null
+++ b/src/test/java/info/textgrid/services/aggregator/PlainTextIT.java
@@ -0,0 +1,38 @@
+package info.textgrid.services.aggregator;
+
+import org.apache.commons.io.IOUtils;
+import org.hamcrest.CoreMatchers;
+import org.junit.Test;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.net.HttpURLConnection;
+
+import static org.junit.Assert.*;
+import static org.junit.Assume.*;
+import static org.hamcrest.CoreMatchers.*;
+
+
+public class PlainTextIT extends AbstractIntegrationTest {
+
+    @Test
+    public void testText() throws IOException {
+        final HttpURLConnection connection = createRequest("/text/textgrid:qdnf.0");
+        connection.connect();
+        assertThat(connection.getResponseCode(), is(200));
+        assertThat(connection.getContentType(), is("text/plain;charset=UTF-8"));
+        assertThat(IOUtils.toString(connection.getInputStream()),
+                CoreMatchers.<Object>is("Den Pessimisten\n" +
+                "Ghasel\n" +
+                "Solang uns Liebe lockt mit Lust und Plagen,\n" +
+                "Solang Begeistrung wechselt und Verzagen,\n" +
+                "Solange wird auf Erden nicht die Zeit,\n" +
+                "Die schreckliche, die dichterlose tagen:\n" +
+                "Solang in tausend Formen Schönheit blüht,\n" +
+                "Schlägt auch ein Herz, zu singen und zu sagen,\n" +
+                "Solang das Leid, das ewge, uns umflicht,\n" +
+                "Solange werden wirs in Tönen klagen,\n" +
+                "Und es erlischt erst dann der letzte Traum,\n" +
+                "Wenn er das letzte Herz zu Gott getragen!\n"));
+    }
+}
-- 
GitLab