Commit d97c0b21 authored by thorsten.vitt's avatar thorsten.vitt
Browse files

Merge branch 'release/1.4.0'

parents 880c9be4 16e49fa2
......@@ -33,5 +33,10 @@
<attribute name="org.eclipse.jst.component.dependency" value="/WEB-INF/lib"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/webapp/WEB-INF">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>
/.settings
/.classpath
/target
src/main/resources/git.properties
[submodule "tei-stylesheets"]
path = src/main/webapp/WEB-INF/tei-stylesheets
url = https://github.com/TEIC/Stylesheets.git
Goals
=====
Basically, we would like to have an interface as follows:
* the REST interface should live in a small single class that then instantiates
the workers, so we could also setup a SOAP interface if we want
* most of the argument handling etc. should be the same for all types of
export, so reuse it
* usually, request processing contains of the following steps:
1. argument parsing & (offline) validation
2. argument validation (online), e.g. check whether referred objects
actually exist. Usually this involves fetching at least one object's
metadata in order to determine its format & potentially title for
the results.
3. Result header generation
4. Result body generation.
The first three steps should be as fast as possible, i.e. as few repository
interaction as possible, so we can reply fast. The fourth step will usually
be deferred to the time data is streamed (StreamingOutput's write method)
* There are typical recurring tasks:
* caching of stylesheets
* calculating of Cache headers
* processing trees of documents
* generating filenames both for the result & for parts of it
* generating of TEIcorpus documents
* rewriting links
*
......@@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>info.textgrid.services</groupId>
<artifactId>aggregator</artifactId>
<version>1.2-SNAPSHOT</version>
<version>1.4.0</version>
<packaging>war</packaging>
<name>TextGrid Aggregator Service</name>
......@@ -17,42 +17,36 @@
<url>http://www.thorstenvitt.de/</url>
<organization>Universität Würzburg</organization>
<organizationUrl>http://www.germanistik.uni-wuerzburg.de/lehrstuehle/computerphilologie</organizationUrl>
<roles> </roles>
</developer>
</developers>
<properties>
<commons-io-version>2.4</commons-io-version>
<link-rewriter-version>0.2.1-SNAPSHOT</link-rewriter-version>
<saxon-version>9.4.0.7</saxon-version>
<cxf-version>2.7.4</cxf-version>
<confclient-version>1.0-SNAPSHOT</confclient-version>
<tgsearch-version>2.1.0-SNAPSHOT</tgsearch-version>
<tgcrud-version>2.4.0-SNAPSHOT</tgcrud-version>
<guava-version>14.0.1</guava-version>
<link-rewriter-version>0.4.0-SNAPSHOT</link-rewriter-version>
<saxon-version>9.4.0.7</saxon-version> <!-- XXX mind dependency from epubcheck -->
<cxf-version>2.7.11</cxf-version>
<confclient-version>1.4.0</confclient-version>
<tgsearch-version>3.0.2-SNAPSHOT</tgsearch-version>
<tgcrud-version>2.6.0</tgcrud-version>
<guava-version>15.0</guava-version>
<junit-version>4.11</junit-version>
<spring-version>3.2.2.RELEASE</spring-version>
<tei-xsl-version>6.17</tei-xsl-version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<!-- The following properties are used for configuring the
web service and can be overridden in profiles -->
<aggregator.endpoint.published>http://localhost:13000/aggregator</aggregator.endpoint.published>
<aggregator.textgridrep.default>https://www.textgridlab.org/1.0/confserv</aggregator.textgridrep.default>
<aggregator.textgridrep.dev>https://www.textgridlab.org/dev/confserv</aggregator.textgridrep.dev>
<aggregator.classifier/>
</properties>
<repositories>
<repository>
<id>bibforge.internal.http</id>
<name>Bibforge Managed Internal Repository</name>
<url>http://repository.bibforge.org/archiva/repository/internal</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>bibforge.snapshots.http</id>
<name>Bibforge Managed Snapshot Repository</name>
<url>http://repository.bibforge.org/archiva/repository/snapshots</url>
<id>nexus.dariah</id>
<name>DARIAH Nexus Public Repository</name>
<url>http://dev.dariah.eu/nexus/content/groups/public</url>
<releases>
<enabled>true</enabled>
</releases>
......@@ -139,6 +133,11 @@
<artifactId>cxf-rt-transports-http</artifactId>
<version>${cxf-version}</version>
</dependency>
<dependency>
<groupId>org.apache.cxf</groupId>
<artifactId>cxf-rt-management</artifactId>
<version>${cxf-version}</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-web</artifactId>
......@@ -154,6 +153,18 @@
<artifactId>icu4j</artifactId>
<version>51.1</version>
</dependency>
<dependency>
<groupId>org.idpf</groupId>
<artifactId>epubcheck</artifactId>
<version>4.0.0-alpha3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>xmlunit</groupId>
<artifactId>xmlunit</artifactId>
<version>1.5</version>
<scope>test</scope>
</dependency>
</dependencies>
......@@ -161,9 +172,9 @@
<pluginManagement>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>tomcat-maven-plugin</artifactId>
<version>1.1</version>
<groupId>org.apache.tomcat.maven</groupId>
<artifactId>tomcat6-maven-plugin</artifactId>
<version>2.1</version>
<executions>
<execution>
<id>default-cli</id>
......@@ -224,27 +235,6 @@
<ignore/>
</action>
</pluginExecution>
<pluginExecution>
<pluginExecutionFilter>
<groupId>
com.github.goldin
</groupId>
<artifactId>
copy-maven-plugin
</artifactId>
<versionRange>
[0.2.5,)
</versionRange>
<goals>
<goal>
copy
</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore></ignore>
</action>
</pluginExecution>
</pluginExecutions>
</lifecycleMappingMetadata>
</configuration>
......@@ -252,6 +242,28 @@
</plugins>
</pluginManagement>
<plugins>
<plugin>
<groupId>pl.project13.maven</groupId>
<artifactId>git-commit-id-plugin</artifactId>
<version>2.1.5</version>
<executions>
<execution>
<goals>
<goal>revision</goal>
</goals>
</execution>
</executions>
<configuration>
<verbose>true</verbose>
<dateFormat>yyyy-MM-dd HH:mm:ss</dateFormat>
<generateGitPropertiesFile>true</generateGitPropertiesFile>
<gitDescribe>
<tags>true</tags>
<dirty>-dev</dirty>
<abbrev>7</abbrev>
</gitDescribe>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
......@@ -281,8 +293,8 @@
</executions>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>tomcat-maven-plugin</artifactId>
<groupId>org.apache.tomcat.maven</groupId>
<artifactId>tomcat6-maven-plugin</artifactId>
<executions>
<execution>
<id>start-tomcat</id>
......@@ -334,47 +346,13 @@
</executions>
</plugin>
<!-- Following two executions are used to download the specified version
of the TEI stylesheets and unzip them to the WEB-INF folder of the
target WAR
-->
<plugin>
<groupId>com.github.goldin</groupId>
<artifactId>copy-maven-plugin</artifactId>
<version>0.2.5</version>
<executions>
<execution>
<id>download-tei-xsl</id>
<phase>generate-resources</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<resources>
<resource>
<file>http://downloads.sourceforge.net/project/tei/Stylesheets/tei-xsl-${tei-xsl-version}.zip</file>
<targetPath>${project.build.directory}/${project.artifactId}-${project.version}/WEB-INF</targetPath>
<unpack>true</unpack>
<preservePath>true</preservePath>
<zipEntry>xml/**/*</zipEntry>
<skipIdentical>true</skipIdentical>
</resource>
</resources>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.14</version>
<configuration>
<systemPropertyVariables>
<webapp.directory>${project.build.directory}/${project.artifactId}-${project.version}</webapp.directory>
<webapp.directory>${project.basedir}/src/main/webapp</webapp.directory>
</systemPropertyVariables>
</configuration>
</plugin>
......@@ -390,14 +368,28 @@
<filtering>true</filtering>
<targetPath>WEB-INF</targetPath>
<includes>
<include>**/beans.xml</include>
<include>aggregator.properties</include>
</includes>
</resource>
</webResources>
<classifier>${aggregator.classifier}</classifier>
</configuration>
</plugin>
</plugin>
</plugins>
<resources>
<resource>
<directory>${basedir}/src/main/resources</directory>
<filtering>true</filtering>
</resource>
<resource>
<directory>${basedir}/src/main/webapp/WEB-INF</directory>
<filtering>true</filtering>
<includes>
<include>aggregator.properties</include>
</includes>
<targetPath>${project.build.directory}</targetPath>
</resource>
</resources>
</build>
<profiles>
<profile>
......@@ -405,8 +397,8 @@
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>tomcat-maven-plugin</artifactId>
<groupId>org.apache.tomcat.maven</groupId>
<artifactId>tomcat6-maven-plugin</artifactId>
<configuration>
<url>http://wrzh075.rzhousing.uni-wuerzburg.de:8180/manager</url>
<server>wrzh075</server>
......@@ -415,7 +407,8 @@
</plugins>
</build>
<properties>
<aggregator.endpoint.published>http://test1.digital-humanities.de/aggregator</aggregator.endpoint.published>
<aggregator.endpoint.published>http://test1.digital-humanities.de/services/aggregator</aggregator.endpoint.published>
<aggregator.classifier>wue</aggregator.classifier>
</properties>
</profile>
<profile>
......@@ -423,8 +416,8 @@
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>tomcat-maven-plugin</artifactId>
<groupId>org.apache.tomcat.maven</groupId>
<artifactId>tomcat6-maven-plugin</artifactId>
<configuration>
<url>http://services:8080/manager</url>
<server>services</server>
......@@ -434,8 +427,38 @@
</build>
<properties>
<aggregator.endpoint.published>https://textgridlab.org/1.0/aggregator</aggregator.endpoint.published>
<aggregator.classifier>services</aggregator.classifier>
</properties>
</profile>
<profile>
<id>tgbackend2</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.tomcat.maven</groupId>
<artifactId>tomcat6-maven-plugin</artifactId>
<configuration>
<url>http://tgbackend2.gwdg.de:8080/manager</url>
<server>tgbackend2</server>
</configuration>
</plugin>
</plugins>
</build>
<properties>
<aggregator.endpoint.published>http://textgridrep.de/beta-es/services/aggregator</aggregator.endpoint.published>
<aggregator.textgridrep.default>http://tgbackend-2.gwdg.de/confserv</aggregator.textgridrep.default>
<aggregator.classifier>tgbackend2</aggregator.classifier>
</properties>
</profile>
<profile>
<id>esx1</id>
<properties>
<aggregator.endpoint.published>http://textgrid-esx1.gwdg.de/1.0/aggregator</aggregator.endpoint.published>
<aggregator.textgridrep.default>http://textgrid-esx1.gwdg.de/1.0/confserv</aggregator.textgridrep.default>
<aggregator.textgridrep.dev>https://textgridlab.org/dev/confserv</aggregator.textgridrep.dev>
<aggregator.classifier>esx1</aggregator.classifier>
</properties>
</profile>
<profile>
<id>authtests</id>
<activation>
......@@ -485,4 +508,18 @@
</organization>
<description>The TextGrid Aggregator is an experimental service that can walk aggregations (including editions and collections) and export the objects therein to various formats, e.g., TEI corpus, EPUB, and (experimentally) PDF.</description>
<inceptionYear>2012</inceptionYear>
<scm>
<url>https://projects.gwdg.de/projects/aggregator/repository</url>
<connection>scm:git:git://git.projects.gwdg.de/aggregator.git</connection>
<developerConnection>scm:git:ssh://git@git.projects.gwdg.de/aggregator.git</developerConnection>
</scm>
<issueManagement>
<system>Jira</system>
<url>https://pm.sub.uni-goettingen.de/</url>
</issueManagement>
<ciManagement>
<system>Jenkins</system>
<url>http://dev.digital-humanities.de/ci/job/aggregator-git-2/</url>
</ciManagement>
<url>https://projects.gwdg.de/projects/aggregator</url>
</project>
package info.textgrid.services.aggregator;
import info.textgrid.namespaces.metadata.core._2010.MetadataContainerType;
import info.textgrid.namespaces.metadata.core._2010.ObjectType;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.AuthFault;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.IoFault;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.MetadataParseFault;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.ObjectNotFoundFault;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.ProtocolNotImplementedFault;
import info.textgrid.services.aggregator.ITextGridRep.TGOSupplier;
import info.textgrid.utils.export.filenames.DefaultFilenamePolicy;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.text.MessageFormat;
import java.util.Date;
import java.util.Locale;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.CacheControl;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Request;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.Response.ResponseBuilder;
import javax.ws.rs.core.StreamingOutput;
import javax.xml.datatype.DatatypeConstants;
import javax.xml.datatype.XMLGregorianCalendar;
import net.sf.saxon.s9api.SaxonApiException;
import com.google.common.base.Joiner;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.base.Stopwatch;
/**
* The abstract base class for all aggregators/exporters. This tries to capture
* all behavior common to the various format specific exporters, and it tries to
* provide sensible defaults so code complexity for the implementers is reduced.
*
* <p>These are the typical steps:</p>
* <ol>
* <li>The exporter is instantiated using one of the constructors and configured
* for the specific request.</li>
* <li>The response (except for the content part) is built using
* {@link #createResponse()}.
* <p>
* This step should include the required steps to decide on the response type
* (success/error …), but it should return as fast as possible since the
* client/user will not get any feedback before.
* </p>
* </li>
* <li>The actual content should be generated and returned by the
* {@link #write(java.io.OutputStream)} method, which is called on-demand when
* the client tries to read the body part of the response.</li>
* </ol>
*
* Implementors will typically call at least the following configuration methods:
* <ul>
* <li>{@link #setMediaType(String)} to set the response's content type
* <li>{@link #setFileExtension(String)} to set the response's suggested filename extension
* </ul>
* Implementors will typically implement / extend the following methods:
* <ul>
* <li>implement {@link #write(java.io.OutputStream)} to generate the contents
* <li>extend {@link #createResponse()} to setup the response any further
* </ul>
*
*
* @author vitt
*/
public abstract class AbstractExporter implements StreamingOutput {
private static final Logger logger = Logger
.getLogger(AbstractExporter.class.getCanonicalName());
private Optional<String> sid = Optional.absent();
protected Optional<String> getSid() {
return sid;
}
public SourceType getSourceType() {
return sourceType;
}
public URI[] getRootURIs() {
return rootURIs;
}
// detected and extracted
public enum SourceType {
UNKNOWN, XML, AGGREGATION, BASKET
}
protected enum Disposition {
INLINE, ATTACH;
@Override
public String toString() {
return super.toString().toLowerCase(Locale.ENGLISH);
}
}
protected SourceType sourceType = SourceType.UNKNOWN;
protected final ITextGridRep repository;
protected final Request request;
protected final Stopwatch stopwatch;
private TGOSupplier<InputStream> content;
protected URI[] rootURIs;
private ObjectType[] rootObjects;
private Date lastModified;
private Boolean notModified = null;
private ResponseBuilder responseBuilder;
private String title;
private String fileExtension;
private String mediaType = MediaType.APPLICATION_OCTET_STREAM;
public String getTitle() {
return title;
}
public void setTitle(final String title) {
this.title = title;
}
public String getFileExtension() {
return fileExtension;
}
public void setFileExtension(final String fileExtension) {
this.fileExtension = fileExtension;
}
public Disposition getDisposition() {
return disposition;
}
public void setDisposition(final Disposition disposition) {
this.disposition = disposition;
}
private Disposition disposition = Disposition.ATTACH;
public AbstractExporter(final ITextGridRep repository,
final Request request, final String uriList) {
Preconditions.checkArgument(repository != null, "non-null repository argument required");
stopwatch = new Stopwatch();
stopwatch.start();
this.repository = repository;
this.request = request;
this.rootURIs = extractURIs(uriList);
if (rootURIs.length > 1)
this.sourceType = SourceType.BASKET;
}
private static URI[] extractURIs(final String uriList) {
final String[] uriStrings = uriList.split(",");
Preconditions.checkArgument(uriStrings.length > 0, "No URI found in %s", uriList);
final URI[] uris = new URI[uriStrings.length];
for (int i = 0; i < uriStrings.length; i++) {
uris[i] = URI.create(uriStrings[i]);
}
return uris;
}
/**
* Returns an array with the metadata records of all root objects.
*/
protected ObjectType[] getRootObjects() throws MetadataParseFault,
ObjectNotFoundFault, IoFault, AuthFault {
if (rootObjects == null) {
if (sourceType == SourceType.BASKET) {
final ObjectType[] objects = new ObjectType[rootURIs.length];
for (int i = 0; i < rootURIs.length; i++) {
final MetadataContainerType container = repository.getCRUDService().readMetadata(sid.orNull(), null, rootURIs[i].toString());
objects[i] = container.getObject();
}
rootObjects = objects;
logger.log(Level.INFO, MessageFormat.format(
"Collected root objects for basket {0} after {1}",
this, stopwatch.toString()));
} else
rootObjects = new ObjectType[] { getContentSimple()
.getMetadata() };
}
return rootObjects;
}
/**
* Returns a single, possibly virtual object representing the exporter's
* root input. {@link AbstractExporter}'s implementation delegates to
* {@link #getContentBasket()} or {@link #getContentSimple()}, depending on
* the request type.
*
* @throws IllegalStateException
* if not supported.
*/
protected TGOSupplier<InputStream> getContent() {
if (sourceType == SourceType.BASKET) {
return getContentBasket();
} else
return getContentSimple();
}
/**
* Constructs a single virtual object representing the root objects. Clients
* who need this functionality must override.
*
* @throws IllegalStateException
* if not supported.
*/
protected TGOSupplier<InputStream> getContentBasket() {
throw new IllegalStateException(