Commit 10fb28ce authored by thorsten.vitt's avatar thorsten.vitt
Browse files

Merge branch 'feature/zip-config' into develop

parents e6733a14 5a1f7b6c
......@@ -30,7 +30,11 @@
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
<attribute name="org.eclipse.jst.component.dependency" value="/WEB-INF/lib"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/webapp/WEB-INF">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
......
......@@ -22,7 +22,7 @@
<properties>
<commons-io-version>2.4</commons-io-version>
<link-rewriter-version>0.3.1-SNAPSHOT</link-rewriter-version>
<link-rewriter-version>0.3.4-SNAPSHOT</link-rewriter-version>
<saxon-version>9.4.0.7</saxon-version>
<cxf-version>2.7.7</cxf-version>
<confclient-version>1.0-SNAPSHOT</confclient-version>
......
......@@ -15,6 +15,7 @@
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.List;
import java.util.concurrent.ExecutionException;
import javax.servlet.ServletContext;
......@@ -137,9 +138,35 @@ public Response getZIP(
@QueryParam("sid") final String sid,
@Description("(optional) title for the exported data, currently only used for generating the filename. If none is given, the first title of the first object will be used.")
@QueryParam("title") final String title,
@QueryParam("filenames")
@DefaultValue("{parent|/}{author}-{title}*.{ext}")
@Description("Pattern for the generated filenames in the ZIP files.") final
String filenames,
@QueryParam("metanames")
@DefaultValue("{filename}.meta")
@Description("Pattern for the filenames for the metadata files in the ZIP files.") final
String metanames,
@QueryParam("dirnames")
@DefaultValue("{parent|/}{title}*")
@Description("Pattern for the directory names generated for aggregations etc. This pattern applied to the parent aggregation is available as {parent} in filenames and metanames.") final
String dirnames,
@QueryParam("only")
@Description("Restrict export to objects with the given MIME types")
final List<String> only,
@QueryParam("meta")
@Description("Export metadata and aggregation files")
@DefaultValue("true")
final boolean meta,
@QueryParam("transform")
@Description("Transform XML documents")
final String transform,
@Context final Request request) throws MetadataParseFault, ObjectNotFoundFault, IoFault, AuthFault, ProtocolNotImplementedFault, IOException, SaxonApiException {
final ZipResult zipResult = new ZipResult(repository, request, uriList);
final ZipResult zipResult = new ZipResult(repository, getStylesheetManager(), request, uriList, filenames, metanames, dirnames, only, meta, transform);
if (title != null)
zipResult.setTitle(title);
if (sid != null)
......
package info.textgrid.services.aggregator.tree;
import info.textgrid.namespaces.metadata.core._2010.GeneratedType.Pid;
import info.textgrid.namespaces.metadata.core._2010.GeneratedType.TextgridUri;
import info.textgrid.namespaces.metadata.core._2010.ObjectType;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.AuthFault;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.IoFault;
......@@ -10,9 +12,14 @@
import info.textgrid.utils.export.aggregations.AggregationEntry;
import java.util.Deque;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.google.common.base.Predicate;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.ibm.icu.text.MessageFormat;
......@@ -31,12 +38,48 @@ protected AggregationTreeFactory(final ObjectType root, final ITextGridRep repos
walkAggregation(root, false);
}
private static final Pattern INTERNAL_LINK = Pattern.compile("^(textgrid|hdl):([^.]*)(\\.(\\d+))?$");
/**
* Returns <code>true</code> iff link is a link that probably refers to target.
*/
static boolean refersTo(final String link, final ObjectType target) {
final Matcher matcher = INTERNAL_LINK.matcher(link);
if (!matcher.matches()) {
logger.log(Level.WARNING, "Internal link {0} doesn't match the pattern for internal links", link);
return false;
}
final String scheme = matcher.group(1);
if ("textgrid".equals(scheme)) {
final String targetURI = target.getGeneric().getGenerated().getTextgridUri().getValue();
if (link.equals(targetURI))
return true;
if (matcher.group(3) == null) { // generic URI
final Matcher targetMatcher = INTERNAL_LINK.matcher(targetURI);
return targetMatcher.matches() && targetMatcher.group(2).equals(matcher.group(2));
}
} else if ("hdl".equals(scheme)) {
List<Pid> pids = target.getGeneric().getGenerated().getPid();
return Iterables.any(pids, new Predicate<Pid>() {
@Override
public boolean apply(Pid input) {
return link.equals(input.getValue());
}
});
}
return false;
}
@Override
protected boolean walk(final ObjectType object, final boolean again) {
if (super.walk(object, again))
return true; // Aggregation already handled via #walkAggregation
else {
new AggregationEntry(object, stack.peek());
Aggregation parent = stack.peek();
AggregationEntry entry = new AggregationEntry(object, parent);
if (parent.getMetadata().getEdition() != null &&
refersTo(parent.getMetadata().getEdition().getIsEditionOf(), object))
parent.setWork(entry);
return true;
}
}
......@@ -55,7 +98,7 @@ protected void walkAggregation(final ObjectType aggregation,
final ObjectType workObject = getRepository()
.getCRUDService()
.readMetadata(getSid(), "", workURI).getObject();
walkAggregation(workObject, again);
walk(workObject, again);
} catch (final MetadataParseFault e) {
logger.log(
Level.WARNING,
......
......@@ -10,11 +10,11 @@
import info.textgrid.services.aggregator.AbstractExporter;
import info.textgrid.services.aggregator.ITextGridRep;
import info.textgrid.services.aggregator.tree.AggregationTreeFactory;
import info.textgrid.services.aggregator.util.StylesheetManager;
import info.textgrid.utils.export.aggregations.AggregationEntry;
import info.textgrid.utils.export.aggregations.IAggregation;
import info.textgrid.utils.export.aggregations.IAggregationEntry;
import info.textgrid.utils.export.filenames.DefaultFilenamePolicy;
import info.textgrid.utils.export.filenames.DefaultMetaFilenamePolicy;
import info.textgrid.utils.export.filenames.ConfigurableFilenamePolicy;
import info.textgrid.utils.export.filenames.IFilenamePolicy;
import info.textgrid.utils.linkrewriter.ConfigurableXMLRewriter;
import info.textgrid.utils.linkrewriter.ImportMapping;
......@@ -24,6 +24,7 @@
import java.io.OutputStream;
import java.net.URI;
import java.text.MessageFormat;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;
......@@ -37,6 +38,11 @@
import javax.ws.rs.core.StreamingOutput;
import javax.xml.bind.JAXB;
import javax.xml.stream.XMLStreamException;
import javax.xml.transform.stream.StreamSource;
import net.sf.saxon.s9api.Serializer;
import net.sf.saxon.s9api.XsltExecutable;
import net.sf.saxon.s9api.XsltTransformer;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
......@@ -48,16 +54,22 @@
import com.google.common.collect.Sets;
import com.google.common.io.ByteStreams;
import com.google.common.io.FileBackedOutputStream;
import com.google.common.net.MediaType;
public class ZipResult extends AbstractExporter implements StreamingOutput {
private final IFilenamePolicy policy;
private final ConfigurableFilenamePolicy policy;
private final IFilenamePolicy metaPolicy;
private ImportMapping mapping;
private final static Logger logger = Logger.getLogger(ZipResult.class
.getCanonicalName());
private final Set<URI> written = Sets.newHashSet();
private boolean onlySomeFormats;
private List<MediaType> onlyFormats;
private boolean includeMeta;
private StylesheetManager stylesheetManager;
private Optional<Transformation> transformation = Optional.absent();
private static final Function<ObjectType, String> GetURI = new Function<ObjectType, String>() {
......@@ -69,13 +81,70 @@ public String apply(final ObjectType input) {
};
public ZipResult(final ITextGridRep repository, final Request request,
final String uriList) {
public ZipResult(final ITextGridRep repository,
StylesheetManager stylesheetManager, final Request request,
final String uriList, final String filenames,
final String metanames, final String dirnames,
final List<String> only, final boolean includeMeta, String transform) {
super(repository, request, uriList);
this.policy = new DefaultFilenamePolicy();
this.metaPolicy = new DefaultMetaFilenamePolicy(policy);
final ConfigurableFilenamePolicy parentPolicy = ConfigurableFilenamePolicy
.builder(dirnames).isParent().build();
this.policy = ConfigurableFilenamePolicy.builder(filenames)
.subPolicy("parent", parentPolicy).build();
this.metaPolicy = ConfigurableFilenamePolicy.builder(metanames)
.subPolicy("parent", parentPolicy)
.subPolicy("filename", policy).build();
if (only != null && only.size() > 0)
this.onlySomeFormats = true;
this.onlyFormats = Lists.transform(only,
new Function<String, MediaType>() {
@Override
public MediaType apply(final String input) {
return MediaType.parse(input);
}
});
setFileExtension("zip");
setMediaType("application/zip");
this.includeMeta = includeMeta;
this.stylesheetManager = stylesheetManager;
setupTransform(transform);
}
private static class Transformation {
public final String targetFormat;
public final URI stylesheet;
public Transformation(String targetFormat, String stylesheet) {
this.targetFormat = targetFormat;
this.stylesheet = URI.create(stylesheet);
}
}
private static ImmutableMap<String, Transformation> BUILTIN_TRANSFORMATIONS = ImmutableMap
.<String, Transformation> builder()
.put("text",
new Transformation("text/plain",
"/WEB-INF/tei-stylesheets/txt/tei-to-text.xsl"))
.put("html",
new Transformation("text/html",
"/WEB-INF/stylesheets/db2xhtml.xsl")).build();
private void setupTransform(String transform) {
if (transform == null || transform.isEmpty())
return;
if (BUILTIN_TRANSFORMATIONS.containsKey(transform))
this.transformation = Optional.of(BUILTIN_TRANSFORMATIONS
.get(transform));
else if (transform.startsWith("textgrid:")
|| transform.startsWith("hdl:"))
this.transformation = Optional.of(new Transformation("text/xml",
transform));
}
@Override
......@@ -83,7 +152,7 @@ public void write(final OutputStream output) throws IOException,
WebApplicationException {
final ZipOutputStream zip = new ZipOutputStream(output);
try {
ObjectType[] rootObjects = getRootObjects();
final ObjectType[] rootObjects = getRootObjects();
final String uriList = Joiner.on(", ")
.join(Iterators.transform(Iterators.forArray(rootObjects),
GetURI));
......@@ -110,30 +179,27 @@ public void write(final OutputStream output) throws IOException,
" Built aggregation tree for {1} after {0}",
stopwatch, GetURI.apply(rootMetadata)));
roots.add(entry);
addToMapping(mapping, entry);
}
for (final IAggregationEntry root : roots) {
if (root instanceof IAggregation)
writeAggregation(zip, (IAggregation) root);
else
writeFile(zip, root);
logger.log(Level.INFO, MessageFormat.format(
" Zipped {1} after {0}", stopwatch,
root.getTextGridURI()));
addToMapping(mapping, (AggregationEntry) entry);
}
// now serializing the mapping
// now serializing the mapping. Should be moved to the end since
// writing may change the rewrite config.
zip.putNextEntry(new ZipEntry(".INDEX.imex"));
JAXB.marshal(mapping.toImportSpec(), zip);
zip.closeEntry();
} catch (MetadataParseFault e) {
for (final ImportObject o : mapping) {
final ImportEntry importEntry = (ImportEntry) o;
writeFile(zip, importEntry);
}
} catch (final MetadataParseFault e) {
throw new WebApplicationException(e);
} catch (ObjectNotFoundFault e) {
} catch (final ObjectNotFoundFault e) {
throw new WebApplicationException(e, Status.NOT_FOUND);
} catch (IoFault e) {
} catch (final IoFault e) {
throw new WebApplicationException(e);
} catch (AuthFault e) {
} catch (final AuthFault e) {
throw new WebApplicationException(e, Status.FORBIDDEN);
} finally {
zip.close();
......@@ -164,6 +230,34 @@ private static Optional<String> getRewriteConfig(final String contentType) {
return Optional.fromNullable(REWRITE_CONFIGS.get(contentType));
}
private static class ImportEntry extends ImportObject {
private final AggregationEntry treeEntry;
public AggregationEntry getTreeEntry() {
return treeEntry;
}
public long getLastModified() {
return treeEntry.getMetadata().getGeneric().getGenerated()
.getLastModified().toGregorianCalendar().getTimeInMillis();
}
public ImportEntry(AggregationEntry treeEntry) {
super();
this.treeEntry = treeEntry;
setTextgridUri(treeEntry.getTextGridURI().toString());
final Optional<String> rewriteConfig = ZipResult
.getRewriteConfig(treeEntry.getFormat());
if (rewriteConfig.isPresent()) {
setRewriteMethod(RewriteMethod.XML);
setRewriteConfig(rewriteConfig.get());
} else {
setRewriteMethod(RewriteMethod.NONE);
}
}
}
/**
* Recursively adds the entry to the mapping.
*
......@@ -171,79 +265,77 @@ private static Optional<String> getRewriteConfig(final String contentType) {
* @param entry
*/
private void addToMapping(final ImportMapping mapping,
final IAggregationEntry entry) {
final Optional<String> rewriteConfig = getRewriteConfig(((AggregationEntry) entry)
.getFormat());
final ImportObject importObject = new ImportObject();
importObject.setTextgridUri(entry.getTextGridURI().toString());
importObject.setLocalData(policy.getFilename(entry, false).toString());
importObject.setLocalMetadata(metaPolicy.getFilename(entry, false)
final AggregationEntry entry) {
final String format = ((AggregationEntry) entry).getFormat();
if (!onlySomeFormats || isAllowedFormat(format)) {
final ImportEntry importObject = new ImportEntry(
(AggregationEntry) entry);
importObject.setLocalData(policy.getFilename(entry).toString());
importObject.setLocalMetadata(metaPolicy.getFilename(entry)
.toString());
if (rewriteConfig.isPresent()) {
importObject.setRewriteMethod(RewriteMethod.XML);
importObject.setRewriteConfig(rewriteConfig.get());
} else {
importObject.setRewriteMethod(RewriteMethod.NONE);
}
mapping.add(importObject);
}
if (entry instanceof IAggregation) {
final IAggregation aggregation = (IAggregation) entry;
for (final IAggregationEntry child : aggregation.getChildren()) {
addToMapping(mapping, child);
addToMapping(mapping, (AggregationEntry) child);
}
}
}
private void writeAggregation(final ZipOutputStream zip,
final IAggregation root) throws IOException {
final URI uri = root.getTextGridURI();
if (written.contains(uri)) {
logger.log(Level.WARNING, "Skipping duplicate aggregation {0}", uri);
return;
}
writeFile(zip, root);
final ZipEntry zipEntry = new ZipEntry(policy.getFilename(root, true)
.toString());
zip.putNextEntry(zipEntry);
zipEntry.setTime(root.getMetadata().getGeneric().getGenerated()
.getLastModified().toGregorianCalendar().getTimeInMillis());
zip.closeEntry();
for (final IAggregationEntry child : root.getChildren()) {
if (child instanceof IAggregation) {
writeAggregation(zip, (IAggregation) child);
} else {
writeFile(zip, child);
}
private boolean isAllowedFormat(final String format) {
if (!onlySomeFormats)
return true;
final MediaType thisMediaType = MediaType.parse(format);
for (final MediaType allowed : onlyFormats) {
if (thisMediaType.is(allowed))
return true;
}
return false;
}
private void writeFile(final ZipOutputStream zip,
final IAggregationEntry child) throws IOException {
URI uri = child.getTextGridURI();
if (written.contains(uri)) {
logger.log(Level.WARNING, "Skipping duplicate object {0}", uri);
return;
}
written.add(uri);
writeMetadata(zip, child);
final ZipEntry zipEntry = new ZipEntry(policy.getFilename(child, false)
.toString());
zipEntry.setTime(child.getMetadata().getGeneric().getGenerated()
.getLastModified().toGregorianCalendar().getTimeInMillis());
final ImportEntry importEntry) throws IOException {
writeMetadata(zip, importEntry);
writeMissingDirectories(zip, importEntry.getLocalData(), importEntry.getLastModified());
final ZipEntry zipEntry = new ZipEntry(importEntry.getLocalData());
zipEntry.setTime(importEntry.getTreeEntry().getMetadata().getGeneric()
.getGenerated().getLastModified().toGregorianCalendar()
.getTimeInMillis());
final ImportObject importObject = mapping
.getImportObjectForTextGridURI(child.getTextGridURI()
.toString());
try {
final InputStream content = repository.getContent(
child.getTextGridURI(), getSid().orNull());
if (importObject.getRewriteMethod().equals(RewriteMethod.XML)) {
final InputStream content = repository.getContent(importEntry
.getTreeEntry().getTextGridURI(), getSid().orNull());
// Content options:
// (1) Transformation, e.g. to text. Only for xml and if configured.
if (transformation.isPresent()
&& ("text/xml".equals(importEntry.getTreeEntry()
.getFormat()))) {
zipEntry.setComment("Transformed from text/xml");
zip.putNextEntry(zipEntry);
XsltTransformer transformer = stylesheetManager
.getStylesheet(transformation.get().stylesheet,
getSid(), false, false).load();
StreamSource source = new StreamSource(content, importEntry
.getTreeEntry().getTextGridURI().toString());
Serializer serializer = stylesheetManager.xsltProcessor
.newSerializer(zip);
transformer.setSource(source);
transformer.setDestination(serializer);
transformer.transform();
// (2) Link rewriting, using XML. Only if rewrite method
// configured.
} else if (importEntry.getRewriteMethod().equals(RewriteMethod.XML)) {
final ConfigurableXMLRewriter rewriter = new ConfigurableXMLRewriter(
mapping, true);
rewriter.configure(URI.create(importObject.getRewriteConfig()));
final Optional<URI> base = policy.getBase(child);
rewriter.configure(URI.create(importEntry.getRewriteConfig()));
final Optional<URI> base = policy.getBase(importEntry
.getTreeEntry());
if (base.isPresent()) {
rewriter.setBase(base.get());
}
......@@ -254,16 +346,18 @@ private void writeFile(final ZipOutputStream zip,
zip.putNextEntry(zipEntry);
ByteStreams.copy(buffer.getSupplier(), zip);
} catch (final XMLStreamException e) {
// (2a) fallback if rewriting failed
final String errorMsg = MessageFormat
.format("Failed to rewrite {0} (error: {1}). Exported with verbatim links instead.",
child, e.getMessage());
importEntry, e.getMessage());
logger.log(Level.WARNING, errorMsg, e);
zipEntry.setComment(errorMsg);
importObject.setRewriteMethod(RewriteMethod.NONE);
importEntry.setRewriteMethod(RewriteMethod.NONE);
zip.putNextEntry(zipEntry);
ByteStreams.copy(buffer.getSupplier(), zip);
}
} else {
// (3) plain copy. For all other cases.
zip.putNextEntry(zipEntry);
ByteStreams.copy(content, zip);
}
......@@ -275,31 +369,74 @@ private void writeFile(final ZipOutputStream zip,
}
private void writeMetadata(final ZipOutputStream zip,
final IAggregationEntry child) throws IOException {
final ZipEntry zipEntry = new ZipEntry(metaPolicy.getFilename(child,
false).toString());
zipEntry.setTime(child.getMetadata().getGeneric().getGenerated()
.getLastModified().toGregorianCalendar().getTimeInMillis());
final ImportEntry importEntry) throws IOException {
if (!includeMeta)
return;
writeMissingDirectories(zip, importEntry.getLocalMetadata(), importEntry.getLastModified());
final ZipEntry zipEntry = new ZipEntry(importEntry.getLocalMetadata());
zipEntry.setTime(importEntry.getLastModified());
zip.putNextEntry(zipEntry);
final ConfigurableXMLRewriter rewriter = new ConfigurableXMLRewriter(
mapping, true);
rewriter.configure(URI.create("internal:textgrid#metadata"));
final Optional<URI> base = metaPolicy.getBase(child);
final Optional<URI> base = metaPolicy.getBase(importEntry
.getTreeEntry());
if (base.isPresent()) {
rewriter.setBase(base.get());
}
final FileBackedOutputStream buffer = new FileBackedOutputStream(
1024 * 1024);
JAXB.marshal(child.getMetadata(), buffer);
JAXB.marshal(importEntry.getTreeEntry().getMetadata(), buffer);
try {
rewriter.rewrite(buffer.getSupplier().getInput(), zip);
} catch (final XMLStreamException e) {
logger.log(Level.SEVERE, MessageFormat.format(
"Error rewriting the metadata of {0}. Should not happen.",
child), e);
importEntry), e);
}
zip.closeEntry();
}
private final Set<String> writtenDirectoryNames = Sets.newHashSet();
/**
* This writes 'parent directory' entries to the zip stream for all
* directory names required for the given filename.
*
* @param zip
* The zip stream.
* @param filename
* The filename for which to create directory entries
* @param lastModified
* The modification date to use for the directory entries created
* in this step.