Commit d55afd10 authored by thorsten.vitt's avatar thorsten.vitt
Browse files

Merge branch 'release/1.4.1'

parents d97c0b21 3719aec1
#!/bin/bash
cat <<EOP
This debugging script assists in the progress that is neccessary after
stylesheet updates: It runs the html regression test to dump generated HTMLs to
the hard disk, generates canonicalized and formatted versions of them, and
allows you to compare the committed versions with the newly generated ones.
EOP
mkdir -p target/diffable-html/{old,new,new-raw}
echo "Running the regression test (may take a while) ..."
if mvn test -Dtest=HtmlRegressionTest > target/diffable-html/mvn.log 2>&1
then
echo ':-) Regression test succeeded -- you can probably ^C now ...'
else
echo ':-( Test failed -- probably there are differences in the generated HTML'
fi
cp target/surefire-reports/info.textgrid.services.aggregator.html.HtmlRegressionTest.txt target/diffable-html/HtmlRegressionTest.txt
cat target/diffable-html/HtmlRegressionTest.txt
echo "Generating the new HTMLs now (may take a while) ..."
mvn test -Dtest=HtmlRegressionTest -Dregression.outputdir=target/diffable-html/new-raw > target/diffable-html/mvn2.log 2>&1
for f in src/test/resources/*.html
do
xmlstarlet c14n --without-comments $f | xmlstarlet fo > target/diffable-html/old/`basename $f`
done
for f in target/diffable-html/new-raw/*.html
do
xmlstarlet c14n --without-comments $f | xmlstarlet fo > target/diffable-html/new/`basename $f`
done
cat <<EOP
Canonicalized, formatted HTML test files have been generated in subdirectories
of target/diffable-html:
- old: As present in src/test/resources/*.html
- new: As generated from the current version of the stylesheet
- new-raw: Not canonicalized and formatted, i.e. ready for inclusion in the next commit.
EOP
cd target/diffable-html
choice=0
while [ $choice != x ]
do
cat <<EOP
What do you want to do?
(d) view the diffs via colordiff
(m) view the diffs via meld
(t) view the test report with xmldiff information
(l) view the maven log
(a) accept the changes, i.e. copy stuff to src/test/resources
(x) exit this tool
EOP
read -p"dmtax > " -n1 choice
case $choice in
D|d)
colordiff old new | less -R
;;
M|m)
meld old new
;;
T|t)
echo "Running less ..."
less HtmlRegressionTest.txt
;;
L|l)
less mvn.log mvn2.log
;;
A|a)
for f in new-raw/*.html
do
if cmp -s old/`basename $f` new/`basename $f`
then
: # echo Skipping semantically unchanged file $f
else
cp -vp $f ../../src/test/resources
fi
done
cd ../../
git status
echo You need to add and commit stuff now.
exit 0
;;
esac
done
......@@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>info.textgrid.services</groupId>
<artifactId>aggregator</artifactId>
<version>1.4.0</version>
<version>1.4.1</version>
<packaging>war</packaging>
<name>TextGrid Aggregator Service</name>
......@@ -23,7 +23,7 @@
<properties>
<commons-io-version>2.4</commons-io-version>
<link-rewriter-version>0.4.0-SNAPSHOT</link-rewriter-version>
<saxon-version>9.4.0.7</saxon-version> <!-- XXX mind dependency from epubcheck -->
<saxon-version>9.5.1-5</saxon-version> <!-- XXX mind dependency from epubcheck -->
<cxf-version>2.7.11</cxf-version>
<confclient-version>1.4.0</confclient-version>
<tgsearch-version>3.0.2-SNAPSHOT</tgsearch-version>
......@@ -79,6 +79,17 @@
</repository>
</repositories>
<distributionManagement>
<repository>
<id>internal</id>
<url>http://dev.digital-humanities.de/nexus/content/repositories/releases</url>
</repository>
<snapshotRepository>
<id>snapshots</id>
<url>http://dev.digital-humanities.de/nexus/content/repositories/snapshots</url>
</snapshotRepository>
</distributionManagement>
<dependencies>
......@@ -156,7 +167,7 @@
<dependency>
<groupId>org.idpf</groupId>
<artifactId>epubcheck</artifactId>
<version>4.0.0-alpha3</version>
<version>4.0.0-alpha11</version>
<scope>test</scope>
</dependency>
<dependency>
......@@ -290,6 +301,22 @@
</portNames>
</configuration>
</execution>
<execution>
<id>attach-config-file</id>
<phase>package</phase>
<goals>
<goal>attach-artifact</goal>
</goals>
<configuration>
<artifacts>
<artifact>
<file>target/aggregator.properties</file>
<type>properties</type>
<classifier>${aggregator.classifier}</classifier>
</artifact>
</artifacts>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
......@@ -459,6 +486,17 @@
<aggregator.classifier>esx1</aggregator.classifier>
</properties>
</profile>
<profile>
<id>lab</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<properties>
<aggregator.endpoint.published>https://textgridlab.org/1.0/aggregator</aggregator.endpoint.published>
<aggregator.textgridrep.default>https://textgridlab.org/1.0/confserv</aggregator.textgridrep.default>
<aggregator.textgridrep.dev>https://textgridlab.org/dev/confserv</aggregator.textgridrep.dev>
</properties>
</profile>
<profile>
<id>authtests</id>
<activation>
......
......@@ -61,12 +61,12 @@ public class GenericExceptionMapper implements ExceptionMapper<Exception> {
public Response toResponse(final Exception exception) {
Status status;
String message;
if (exception instanceof WebApplicationException && ((WebApplicationException) exception).getResponse() != null && ((WebApplicationException) exception).getResponse().getStatus() != 500) {
if (exception instanceof WebApplicationException && ((WebApplicationException) exception).getResponse() != null /* && ((WebApplicationException) exception).getResponse().getStatus() != 500 */) {
final WebApplicationException wae = (WebApplicationException) exception;
status = Status.fromStatusCode(wae.getResponse().getStatus());
final Object entity = wae.getResponse().getEntity();
if (entity != null)
message = entity.toString();
return wae.getResponse(); // message = entity.toString();
else
message = status.toString();
} else if (exception instanceof WebApplicationException
......
package info.textgrid.services.aggregator;
import info.textgrid.services.aggregator.util.StylesheetManager;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.util.List;
import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.HttpHeaders;
import javax.ws.rs.core.Response.Status;
import javax.ws.rs.core.StreamingOutput;
import javax.ws.rs.core.UriBuilder;
import javax.ws.rs.core.UriInfo;
import javax.xml.transform.stream.StreamSource;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.Serializer;
import net.sf.saxon.s9api.XdmAtomicValue;
import net.sf.saxon.s9api.XsltTransformer;
import com.google.common.base.Optional;
import com.google.common.net.HostAndPort;
public class Help implements StreamingOutput {
private final StylesheetManager stylesheetManager;
private UriInfo uriInfo;
private HttpHeaders headers;
public Help(final StylesheetManager stylesheetManager) {
this.stylesheetManager = stylesheetManager;
}
@Override
public void write(final OutputStream output) throws IOException,
WebApplicationException {
try {
final XsltTransformer transformer = stylesheetManager
.getStylesheet(
URI.create("/WEB-INF/stylesheets/wadl_documentation-2009-02.xsl"),
Optional.<String> absent(), false, false).load();
final Serializer serializer = stylesheetManager.xsltProcessor
.newSerializer(output);
transformer.setDestination(serializer);
transformer.setSource(new StreamSource(getWadlURL().openStream(),
getWadlURL().toString()));
transformer.setParameter(new QName("apptitle"), new XdmAtomicValue(
"TextGrid Aggregator Service"));
transformer.transform();
} catch (final SaxonApiException e) {
throw new WebApplicationException(e);
} catch (final FileNotFoundException e) {
throw new WebApplicationException(GenericExceptionMapper
.toResponse(
Status.NOT_FOUND,
"There still was a not found error: "
+ e.getMessage(),
"Path: " + uriInfo.getPath() + "\nBase URI: "
+ uriInfo.getBaseUri().toString()
+ "\nAbsolute Path: "
+ uriInfo.getAbsolutePath()
+ "\nRequest URI: "
+ uriInfo.getRequestUri().toString())
.build());
}
}
public Help uriInfo(UriInfo uriInfo) {
this.uriInfo = uriInfo;
return this;
}
public Help headers(HttpHeaders headers) {
this.headers = headers;
return this;
}
public URL getWadlURL() {
if (headers != null) {
final List<String> originalUri = headers
.getRequestHeader("X-Original-Request-URI");
if (originalUri != null && originalUri.size() > 0) {
final URL wadlURL = getWadlURL(originalUri.get(0));
if (wadlURL != null)
return wadlURL;
}
}
if (uriInfo != null) {
URL wadlURL = getWadlURL(uriInfo.getAbsolutePath().toString());
if (wadlURL != null)
return wadlURL;
else if (headers != null) {
// Somehow uriInfo fails to recognize the port from the Host header, but
// only on the production machines. This is a workaround.
final HostAndPort host = HostAndPort.fromString(
headers.getRequestHeader("Host").get(0))
.withDefaultPort(80);
wadlURL = getWadlURL(uriInfo.getAbsolutePathBuilder().host(host.getHostText())
.port(host.getPort())
.scheme(host.getPort() == 443 ? "https" : "http")
.build().toString());
if (wadlURL != null)
return wadlURL;
}
}
throw new WebApplicationException(GenericExceptionMapper.toResponse(
Status.BAD_REQUEST,
"Unable to determine a valid path to the WADL.",
"Path: " + uriInfo.getPath() + "\nBase URI: "
+ uriInfo.getBaseUri().toString() + "\nAbsolute Path: "
+ uriInfo.getAbsolutePath() + "\nRequest URI: "
+ uriInfo.getRequestUri().toString()).build());
}
private URL getWadlURL(String string) {
final URI resolved = UriBuilder.fromUri(string).replaceQuery("_wadl")
.build();
try {
final URL url = resolved.toURL();
final URLConnection connection = url.openConnection();
connection.connect();
if (((HttpURLConnection) connection).getResponseCode() == 200)
return url;
else
return null;
} catch (final MalformedURLException e) {
throw new WebApplicationException(e);
} catch (final IOException e) {
throw new WebApplicationException(e);
}
}
}
\ No newline at end of file
......@@ -27,9 +27,11 @@
import javax.ws.rs.QueryParam;
import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.Context;
import javax.ws.rs.core.HttpHeaders;
import javax.ws.rs.core.Request;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.StreamingOutput;
import javax.ws.rs.core.UriInfo;
import net.sf.saxon.s9api.SaxonApiException;
......@@ -39,12 +41,14 @@
import com.google.common.base.Optional;
@Description(title = "TextGrid Aggregator Service",
value = "The Aggregator is a service to export and convert TextGrid documents. It is able to recursively process collections, editions, and other TextGrid aggregations.")
public class REST {
private final ITextGridRep repository;
private StylesheetManager stylesheetManager;
private StylesheetManager getStylesheetManager() {
if (stylesheetManager == null)
stylesheetManager = new StylesheetManager(servlet, repository);
......@@ -54,7 +58,6 @@ private StylesheetManager getStylesheetManager() {
@Context
private ServletContext servlet;
public REST(final ITextGridRep repository) {
this.repository = repository;
}
......@@ -63,18 +66,20 @@ public REST(final ITextGridRep repository) {
@Path("/teicorpus/{uris}")
@Produces("application/tei+xml")
@Descriptions({
@Description(target=DocTarget.METHOD, value="Creates a TEI corpus of all the TEI documents (recursively) aggregated by the given aggregation"),
@Description(target=DocTarget.RETURN, value="TEI corpus document")
})
public Response getCorpus(@Description("TextGrid URIs of the root objects, separated by commas") @PathParam("uris") final String uriList,
@Description(target = DocTarget.METHOD, value = "Creates a TEI corpus of all the TEI documents (recursively) aggregated by the given aggregation"),
@Description(target = DocTarget.RETURN, value = "TEI corpus document") })
public Response getCorpus(
@Description("TextGrid URIs of the root objects, separated by commas") @PathParam("uris") final String uriList,
@Description("Whether to generate a Content-Disposition: attachment header") @QueryParam("attach") @DefaultValue("true") final boolean attach,
@Description("If true, no intermediate TEI corpus documents will be generated for intermediate aggregations, hierarchical structure will be lost") @QueryParam("flat") @DefaultValue("false") final boolean flat,
@Description("Title for the container if multiple root objects are given") @QueryParam("title") final String titleArgument,
@Description("Session id for accessing restricted resources") @QueryParam("sid") final String sid,
@Context final Request request)
throws URISyntaxException, ObjectNotFoundFault, MetadataParseFault, IoFault, AuthFault, ProtocolNotImplementedFault, IOException, SaxonApiException {
@Context final Request request) throws URISyntaxException,
ObjectNotFoundFault, MetadataParseFault, IoFault, AuthFault,
ProtocolNotImplementedFault, IOException, SaxonApiException {
final TEICorpusExporter exporter = new TEICorpusExporter(repository, request, uriList);
final TEICorpusExporter exporter = new TEICorpusExporter(repository,
request, uriList);
exporter.setFlat(flat);
exporter.setTitle(titleArgument);
exporter.sid(sid);
......@@ -82,22 +87,22 @@ public Response getCorpus(@Description("TextGrid URIs of the root objects, separ
return exporter.createResponse().build();
}
@GET
@Path(value = "/epub/{object}")
@Produces(value = "application/epub+zip")
@Description("Converts the given TEI object or the aggregation of TEI objects to an E-Book in EPUB format")
public Response getEPUB(
@Description("The TextGrid URI(s) of the object(s) to convert, separated by commas. Should be either TEI objects or aggregations of TEI (and maybe other) objects")
@PathParam("object") final String uriList,
@Description("The TextGrid URI(s) of the object(s) to convert, separated by commas. Should be either TEI objects or aggregations of TEI (and maybe other) objects") @PathParam("object") final String uriList,
@Description("URL of an alternative stylesheet to use. Must be compatible.") @QueryParam("stylesheet") final URI xsluri,
@Description("Title if multiple root objects given") @QueryParam("title") final String titleParam,
@Description("Session ID for accessing protected objects") @QueryParam("sid") final String sid,
@Context final Request request)
throws ObjectNotFoundFault, MetadataParseFault, IoFault, AuthFault,
ProtocolNotImplementedFault, IOException, SaxonApiException {
@Context final Request request) throws ObjectNotFoundFault,
MetadataParseFault, IoFault, AuthFault,
ProtocolNotImplementedFault, IOException, SaxonApiException {
final EPUBSerializer serializer = new EPUBSerializer(repository, getStylesheetManager(), uriList, Optional.fromNullable(sid), request);
final EPUBSerializer serializer = new EPUBSerializer(repository,
getStylesheetManager(), uriList, Optional.fromNullable(sid),
request);
serializer.setStylesheet(xsluri);
serializer.setTitle(titleParam);
......@@ -107,66 +112,71 @@ public Response getEPUB(
@GET
@Path(value = "/html/{object}")
@Produces(value = "text/html")
@Descriptions({
@Description(target = DocTarget.METHOD, value = "Generates HTML output. This is typically fast, and it is also used at textgridrep.de.", title = "HTML generator"),
@Description(target = DocTarget.RETURN, value = "Either an XHTML document (expect HTML5 elements), or a XHTML fragment containing only the body, if the embedded parameter is true.")
})
public Response getHTML(
@Description("The TextGrid URIs of the TEI document(s) or aggregation(s) to transform, separated by commas") @PathParam("object") final String uriList,
@Description("If given, an alternative XSLT stylesheet to use") @QueryParam("stylesheet") final URI xsluri,
@Description("If true, check for an <?xsl-stylesheet?> processing instruction in the document to render") @QueryParam("pi") final boolean pi,
@Description("If given, an alternative XSLT stylesheet to use. Must be a textgrid URI.") @QueryParam("stylesheet") final URI xsluri,
@Description("If true, check for an <?xsl-stylesheet?> processing instruction in the document to render. Only textgrid: URIs will be resolved.") @QueryParam("pi") final boolean pi,
@Description("If true and a stylesheet has been given, force reloading the stylesheet, do not cache") @QueryParam("refreshStylesheet") final boolean refreshStylesheet,
@Description("Session ID to access protected resources") @QueryParam("sid") final String sid,
@Description("If true, pass the information the stylesheet that its result will be embedded into some website") @QueryParam("embedded") final boolean embedded,
@Description("If true, an HTML fragment consisting of a <div class='body'> element containing the contents of the HTML <body> will be returned, ready to be embedded in an existing HTML page") @QueryParam("embedded") final boolean embedded,
@Description("URL of the CSS that should be referenced in the HTML that is created") @QueryParam("css") final URI css,
@Description("The requested content type. E.g., text/html or text/xml") @QueryParam("mediatype") final String mediaType,
@Description("An XML ID. If given, only this element will be transformed.") @QueryParam("id") final String id,
@Description("If true, a full webpage that looks similar to textgridrep.de's browse view will be returned") @QueryParam("simulate") @DefaultValue("false") final boolean simulate,
@Context final Request request) throws ObjectNotFoundFault,
MetadataParseFault, IoFault, AuthFault,
ProtocolNotImplementedFault, WebApplicationException, IOException,
SaxonApiException, ExecutionException {
final HTMLWriter writer = new HTMLWriter(repository, getStylesheetManager(), uriList, xsluri,
refreshStylesheet, pi, embedded, css, sid, mediaType, id, request);
final HTMLWriter writer = new HTMLWriter(repository,
getStylesheetManager(), uriList, xsluri, refreshStylesheet, pi,
embedded, css, sid, mediaType, id, request);
writer.simulate(simulate);
return writer.createResponse().build();
}
@GET
@Path(value = "/zip/{objects}")
@Produces("application/zip")
@Descriptions({
@Description(target=DocTarget.METHOD, value="Creates a ZIP containing the specified objects and everything that has been aggregated by them, "
+ "optionally transformed and filtered. Links within supported XML documents will be rewritten to relative URLs if the target document "
+ "is also packed into this ZIP. This method may take quite a while depending on the number of objects, and it will not start "
+ "returning something until the metadata for everything that will be exported has been collected, so increase your timeouts ..."),
@Description(target=DocTarget.RETURN, value="The ZIP file returned will usually contain a content document plus a sidecar .meta file for each "
+ "exported object. The .meta file contains the raw metadata according to the TextGrid metadata schema. For aggregations (editions, "
+ "collections), we will typically create both a directory and an ORE content file containing the list of items. "
+ "Additionally, a file called /.INDEX.imex will be included in the archive. This file contains the list of exported objects "
+ "together with the local filename, the name of the local .meta file, the link rewriting method used, and the original TextGrid URI. "
+ "You can use this file to re-import the whole set of files using the TextGridLab. See the Link Rewriter Library's documentation "
+ "for an XML schema and description of the format. \n\nThe ZIP file may contain ZIP comments refering to warning or informational "
+ "messages during export.")
})
public Response getZIP(
@Description("The TextGridURIs of the TEI documents or aggregations to zip, separated by commas (,)")
@PathParam("objects") final String uriList,
@Description("Session ID to access protected resources")
@QueryParam("sid") final String sid,
@Description("(optional) title for the exported data, currently only used for generating the filename. If none is given, the first title of the first object will be used.")
@QueryParam("title") final String title,
@QueryParam("filenames")
@DefaultValue("{parent|/}{author}-{title}*.{ext}")
@Description("Pattern for the generated filenames in the ZIP files.") final
String filenames,
@QueryParam("metanames")
@DefaultValue("{filename}.meta")
@Description("Pattern for the filenames for the metadata files in the ZIP files.") final
String metanames,
@QueryParam("dirnames")
@DefaultValue("{parent|/}{title}*")
@Description("Pattern for the directory names generated for aggregations etc. This pattern applied to the parent aggregation is available as {parent} in filenames and metanames.") final
String dirnames,
@QueryParam("only")
@Description("Restrict export to objects with the given MIME types")
final List<String> only,
@QueryParam("meta")
@Description("Export metadata and aggregation files")
@DefaultValue("true")
final boolean meta,
@QueryParam("transform")
@Description("Transform XML documents")
final String transform,
@Context final Request request) throws MetadataParseFault, ObjectNotFoundFault, IoFault, AuthFault, ProtocolNotImplementedFault, IOException, SaxonApiException {
final ZipResult zipResult = new ZipResult(repository, getStylesheetManager(), request, uriList, filenames, metanames, dirnames, only, meta, transform);
@Description("The TextGridURIs of the TEI documents or aggregations to zip, separated by commas (,)") @PathParam("objects") final String uriList,
@Description("Session ID to access protected resources") @QueryParam("sid") final String sid,
@Description("(optional) title for the exported data, currently only used for generating the filename. If none is given, the first title of the first object will be used.") @QueryParam("title") final String title,
@QueryParam("filenames") @DefaultValue("{parent|/}{author}-{title}*.{ext}") @Description("Pattern for the generated filenames in the ZIP files.") final String filenames,
@QueryParam("metanames") @DefaultValue("{filename}.meta") @Description("Pattern for the filenames for the metadata files in the ZIP files.") final String metanames,
@QueryParam("dirnames") @DefaultValue("{parent|/}{title}*") @Description("Pattern for the directory names generated for aggregations etc. This pattern applied to the parent aggregation is available as {parent} in filenames and metanames.") final String dirnames,
@QueryParam("only") @Description("If at least one only parameter is given, restrict export to objects with the given MIME types") final List<String> only,
@QueryParam("meta") @Description("Include metadata and aggregation files in the ZIP file.") @DefaultValue("true") final boolean meta,
@QueryParam("transform") @Description("(EXPERIMENTAL) Transform each XML document before zipping. Values currently available are text, html, or the textgrid: URI of an XSLT stylesheet.") final String transform,
@Context final Request request) throws MetadataParseFault,
ObjectNotFoundFault, IoFault, AuthFault,
ProtocolNotImplementedFault, IOException, SaxonApiException {
final ZipResult zipResult = new ZipResult(repository,
getStylesheetManager(), request, uriList, filenames, metanames,
dirnames, only, meta, transform);
if (title != null)
zipResult.setTitle(title);
if (sid != null)
......@@ -177,24 +187,35 @@ public Response getZIP(
@GET
@Path(value = "/pdf/{object}")
@Produces("application/pdf")
public Response getPDF(
@PathParam("object") final URI uri,
@QueryParam("sid") final String sid,
@Context final Request request
) throws MetadataParseFault, ObjectNotFoundFault, IoFault, AuthFault, ProtocolNotImplementedFault, IOException, SaxonApiException {
final PDF pdf = new PDF(repository, getStylesheetManager(), request, uri);
@Description("(BROKEN) the PDF export will currently not work on any installed instance of the service.")
public Response getPDF(@PathParam("object") final URI uri,
@QueryParam("sid") final String sid, @Context final Request request)
throws MetadataParseFault, ObjectNotFoundFault, IoFault, AuthFault,
ProtocolNotImplementedFault, IOException, SaxonApiException {
final PDF pdf = new PDF(repository, getStylesheetManager(), request,
uri);
pdf.sid(sid);
return pdf.createResponse().build();
}
@GET
@Path(value = "/version")
@Produces("text/html")
public StreamingOutput getVersion() {
final Version version = new Version(repository, getStylesheetManager());
@Description("Produces an HTML page containing version and configuration information for the service instance.")
public StreamingOutput getVersion(@Context HttpHeaders headers) {
final Version version = new Version(repository, getStylesheetManager()).headers(headers);
return version.get();
}
@GET
@Path(value = "/help")
@Produces("text/html")
@Description("Returns an auto-generated help page summarizing all available arguments.")
public StreamingOutput getHelp(@Context UriInfo uriInfo, @Context HttpHeaders headers) throws SaxonApiException, IOException {
return new Help(getStylesheetManager()).uriInfo(uriInfo).headers(headers);
}
}