Commit 10fb28ce authored by thorsten.vitt's avatar thorsten.vitt
Browse files

Merge branch 'feature/zip-config' into develop

parents e6733a14 5a1f7b6c
...@@ -30,7 +30,11 @@ ...@@ -30,7 +30,11 @@
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER"> <classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes> <attributes>
<attribute name="maven.pomderived" value="true"/> <attribute name="maven.pomderived" value="true"/>
<attribute name="org.eclipse.jst.component.dependency" value="/WEB-INF/lib"/> </attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/webapp/WEB-INF">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes> </attributes>
</classpathentry> </classpathentry>
<classpathentry kind="output" path="target/classes"/> <classpathentry kind="output" path="target/classes"/>
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
<properties> <properties>
<commons-io-version>2.4</commons-io-version> <commons-io-version>2.4</commons-io-version>
<link-rewriter-version>0.3.1-SNAPSHOT</link-rewriter-version> <link-rewriter-version>0.3.4-SNAPSHOT</link-rewriter-version>
<saxon-version>9.4.0.7</saxon-version> <saxon-version>9.4.0.7</saxon-version>
<cxf-version>2.7.7</cxf-version> <cxf-version>2.7.7</cxf-version>
<confclient-version>1.0-SNAPSHOT</confclient-version> <confclient-version>1.0-SNAPSHOT</confclient-version>
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
import java.io.IOException; import java.io.IOException;
import java.net.URI; import java.net.URI;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.util.List;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import javax.servlet.ServletContext; import javax.servlet.ServletContext;
...@@ -137,9 +138,35 @@ public Response getZIP( ...@@ -137,9 +138,35 @@ public Response getZIP(
@QueryParam("sid") final String sid, @QueryParam("sid") final String sid,
@Description("(optional) title for the exported data, currently only used for generating the filename. If none is given, the first title of the first object will be used.") @Description("(optional) title for the exported data, currently only used for generating the filename. If none is given, the first title of the first object will be used.")
@QueryParam("title") final String title, @QueryParam("title") final String title,
@QueryParam("filenames")
@DefaultValue("{parent|/}{author}-{title}*.{ext}")
@Description("Pattern for the generated filenames in the ZIP files.") final
String filenames,
@QueryParam("metanames")
@DefaultValue("{filename}.meta")
@Description("Pattern for the filenames for the metadata files in the ZIP files.") final
String metanames,
@QueryParam("dirnames")
@DefaultValue("{parent|/}{title}*")
@Description("Pattern for the directory names generated for aggregations etc. This pattern applied to the parent aggregation is available as {parent} in filenames and metanames.") final
String dirnames,
@QueryParam("only")
@Description("Restrict export to objects with the given MIME types")
final List<String> only,
@QueryParam("meta")
@Description("Export metadata and aggregation files")
@DefaultValue("true")
final boolean meta,
@QueryParam("transform")
@Description("Transform XML documents")
final String transform,
@Context final Request request) throws MetadataParseFault, ObjectNotFoundFault, IoFault, AuthFault, ProtocolNotImplementedFault, IOException, SaxonApiException { @Context final Request request) throws MetadataParseFault, ObjectNotFoundFault, IoFault, AuthFault, ProtocolNotImplementedFault, IOException, SaxonApiException {
final ZipResult zipResult = new ZipResult(repository, request, uriList); final ZipResult zipResult = new ZipResult(repository, getStylesheetManager(), request, uriList, filenames, metanames, dirnames, only, meta, transform);
if (title != null) if (title != null)
zipResult.setTitle(title); zipResult.setTitle(title);
if (sid != null) if (sid != null)
......
package info.textgrid.services.aggregator.tree; package info.textgrid.services.aggregator.tree;
import info.textgrid.namespaces.metadata.core._2010.GeneratedType.Pid;
import info.textgrid.namespaces.metadata.core._2010.GeneratedType.TextgridUri;
import info.textgrid.namespaces.metadata.core._2010.ObjectType; import info.textgrid.namespaces.metadata.core._2010.ObjectType;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.AuthFault; import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.AuthFault;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.IoFault; import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.IoFault;
...@@ -10,9 +12,14 @@ ...@@ -10,9 +12,14 @@
import info.textgrid.utils.export.aggregations.AggregationEntry; import info.textgrid.utils.export.aggregations.AggregationEntry;
import java.util.Deque; import java.util.Deque;
import java.util.List;
import java.util.logging.Level; import java.util.logging.Level;
import java.util.logging.Logger; import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.google.common.base.Predicate;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.ibm.icu.text.MessageFormat; import com.ibm.icu.text.MessageFormat;
...@@ -30,13 +37,49 @@ protected AggregationTreeFactory(final ObjectType root, final ITextGridRep repos ...@@ -30,13 +37,49 @@ protected AggregationTreeFactory(final ObjectType root, final ITextGridRep repos
// stack.push(this.root); // stack.push(this.root);
walkAggregation(root, false); walkAggregation(root, false);
} }
private static final Pattern INTERNAL_LINK = Pattern.compile("^(textgrid|hdl):([^.]*)(\\.(\\d+))?$");
/**
* Returns <code>true</code> iff link is a link that probably refers to target.
*/
static boolean refersTo(final String link, final ObjectType target) {
final Matcher matcher = INTERNAL_LINK.matcher(link);
if (!matcher.matches()) {
logger.log(Level.WARNING, "Internal link {0} doesn't match the pattern for internal links", link);
return false;
}
final String scheme = matcher.group(1);
if ("textgrid".equals(scheme)) {
final String targetURI = target.getGeneric().getGenerated().getTextgridUri().getValue();
if (link.equals(targetURI))
return true;
if (matcher.group(3) == null) { // generic URI
final Matcher targetMatcher = INTERNAL_LINK.matcher(targetURI);
return targetMatcher.matches() && targetMatcher.group(2).equals(matcher.group(2));
}
} else if ("hdl".equals(scheme)) {
List<Pid> pids = target.getGeneric().getGenerated().getPid();
return Iterables.any(pids, new Predicate<Pid>() {
@Override
public boolean apply(Pid input) {
return link.equals(input.getValue());
}
});
}
return false;
}
@Override @Override
protected boolean walk(final ObjectType object, final boolean again) { protected boolean walk(final ObjectType object, final boolean again) {
if (super.walk(object, again)) if (super.walk(object, again))
return true; // Aggregation already handled via #walkAggregation return true; // Aggregation already handled via #walkAggregation
else { else {
new AggregationEntry(object, stack.peek()); Aggregation parent = stack.peek();
AggregationEntry entry = new AggregationEntry(object, parent);
if (parent.getMetadata().getEdition() != null &&
refersTo(parent.getMetadata().getEdition().getIsEditionOf(), object))
parent.setWork(entry);
return true; return true;
} }
} }
...@@ -55,7 +98,7 @@ protected void walkAggregation(final ObjectType aggregation, ...@@ -55,7 +98,7 @@ protected void walkAggregation(final ObjectType aggregation,
final ObjectType workObject = getRepository() final ObjectType workObject = getRepository()
.getCRUDService() .getCRUDService()
.readMetadata(getSid(), "", workURI).getObject(); .readMetadata(getSid(), "", workURI).getObject();
walkAggregation(workObject, again); walk(workObject, again);
} catch (final MetadataParseFault e) { } catch (final MetadataParseFault e) {
logger.log( logger.log(
Level.WARNING, Level.WARNING,
......
...@@ -10,11 +10,11 @@ ...@@ -10,11 +10,11 @@
import info.textgrid.services.aggregator.AbstractExporter; import info.textgrid.services.aggregator.AbstractExporter;
import info.textgrid.services.aggregator.ITextGridRep; import info.textgrid.services.aggregator.ITextGridRep;
import info.textgrid.services.aggregator.tree.AggregationTreeFactory; import info.textgrid.services.aggregator.tree.AggregationTreeFactory;
import info.textgrid.services.aggregator.util.StylesheetManager;
import info.textgrid.utils.export.aggregations.AggregationEntry; import info.textgrid.utils.export.aggregations.AggregationEntry;
import info.textgrid.utils.export.aggregations.IAggregation; import info.textgrid.utils.export.aggregations.IAggregation;
import info.textgrid.utils.export.aggregations.IAggregationEntry; import info.textgrid.utils.export.aggregations.IAggregationEntry;
import info.textgrid.utils.export.filenames.DefaultFilenamePolicy; import info.textgrid.utils.export.filenames.ConfigurableFilenamePolicy;
import info.textgrid.utils.export.filenames.DefaultMetaFilenamePolicy;
import info.textgrid.utils.export.filenames.IFilenamePolicy; import info.textgrid.utils.export.filenames.IFilenamePolicy;
import info.textgrid.utils.linkrewriter.ConfigurableXMLRewriter; import info.textgrid.utils.linkrewriter.ConfigurableXMLRewriter;
import info.textgrid.utils.linkrewriter.ImportMapping; import info.textgrid.utils.linkrewriter.ImportMapping;
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
import java.io.OutputStream; import java.io.OutputStream;
import java.net.URI; import java.net.URI;
import java.text.MessageFormat; import java.text.MessageFormat;
import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
import java.util.logging.Level; import java.util.logging.Level;
...@@ -37,6 +38,11 @@ ...@@ -37,6 +38,11 @@
import javax.ws.rs.core.StreamingOutput; import javax.ws.rs.core.StreamingOutput;
import javax.xml.bind.JAXB; import javax.xml.bind.JAXB;
import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamException;
import javax.xml.transform.stream.StreamSource;
import net.sf.saxon.s9api.Serializer;
import net.sf.saxon.s9api.XsltExecutable;
import net.sf.saxon.s9api.XsltTransformer;
import com.google.common.base.Function; import com.google.common.base.Function;
import com.google.common.base.Joiner; import com.google.common.base.Joiner;
...@@ -48,16 +54,22 @@ ...@@ -48,16 +54,22 @@
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import com.google.common.io.ByteStreams; import com.google.common.io.ByteStreams;
import com.google.common.io.FileBackedOutputStream; import com.google.common.io.FileBackedOutputStream;
import com.google.common.net.MediaType;
public class ZipResult extends AbstractExporter implements StreamingOutput { public class ZipResult extends AbstractExporter implements StreamingOutput {
private final IFilenamePolicy policy; private final ConfigurableFilenamePolicy policy;
private final IFilenamePolicy metaPolicy; private final IFilenamePolicy metaPolicy;
private ImportMapping mapping; private ImportMapping mapping;
private final static Logger logger = Logger.getLogger(ZipResult.class private final static Logger logger = Logger.getLogger(ZipResult.class
.getCanonicalName()); .getCanonicalName());
private final Set<URI> written = Sets.newHashSet(); private final Set<URI> written = Sets.newHashSet();
private boolean onlySomeFormats;
private List<MediaType> onlyFormats;
private boolean includeMeta;
private StylesheetManager stylesheetManager;
private Optional<Transformation> transformation = Optional.absent();
private static final Function<ObjectType, String> GetURI = new Function<ObjectType, String>() { private static final Function<ObjectType, String> GetURI = new Function<ObjectType, String>() {
...@@ -69,13 +81,70 @@ public String apply(final ObjectType input) { ...@@ -69,13 +81,70 @@ public String apply(final ObjectType input) {
}; };
public ZipResult(final ITextGridRep repository, final Request request, public ZipResult(final ITextGridRep repository,
final String uriList) { StylesheetManager stylesheetManager, final Request request,
final String uriList, final String filenames,
final String metanames, final String dirnames,
final List<String> only, final boolean includeMeta, String transform) {
super(repository, request, uriList); super(repository, request, uriList);
this.policy = new DefaultFilenamePolicy(); final ConfigurableFilenamePolicy parentPolicy = ConfigurableFilenamePolicy
this.metaPolicy = new DefaultMetaFilenamePolicy(policy); .builder(dirnames).isParent().build();
this.policy = ConfigurableFilenamePolicy.builder(filenames)
.subPolicy("parent", parentPolicy).build();
this.metaPolicy = ConfigurableFilenamePolicy.builder(metanames)
.subPolicy("parent", parentPolicy)
.subPolicy("filename", policy).build();
if (only != null && only.size() > 0)
this.onlySomeFormats = true;
this.onlyFormats = Lists.transform(only,
new Function<String, MediaType>() {
@Override
public MediaType apply(final String input) {
return MediaType.parse(input);
}
});
setFileExtension("zip"); setFileExtension("zip");
setMediaType("application/zip"); setMediaType("application/zip");
this.includeMeta = includeMeta;
this.stylesheetManager = stylesheetManager;
setupTransform(transform);
}
private static class Transformation {
public final String targetFormat;
public final URI stylesheet;
public Transformation(String targetFormat, String stylesheet) {
this.targetFormat = targetFormat;
this.stylesheet = URI.create(stylesheet);
}
}
private static ImmutableMap<String, Transformation> BUILTIN_TRANSFORMATIONS = ImmutableMap
.<String, Transformation> builder()
.put("text",
new Transformation("text/plain",
"/WEB-INF/tei-stylesheets/txt/tei-to-text.xsl"))
.put("html",
new Transformation("text/html",
"/WEB-INF/stylesheets/db2xhtml.xsl")).build();
private void setupTransform(String transform) {
if (transform == null || transform.isEmpty())
return;
if (BUILTIN_TRANSFORMATIONS.containsKey(transform))
this.transformation = Optional.of(BUILTIN_TRANSFORMATIONS
.get(transform));
else if (transform.startsWith("textgrid:")
|| transform.startsWith("hdl:"))
this.transformation = Optional.of(new Transformation("text/xml",
transform));
} }
@Override @Override
...@@ -83,7 +152,7 @@ public void write(final OutputStream output) throws IOException, ...@@ -83,7 +152,7 @@ public void write(final OutputStream output) throws IOException,
WebApplicationException { WebApplicationException {
final ZipOutputStream zip = new ZipOutputStream(output); final ZipOutputStream zip = new ZipOutputStream(output);
try { try {
ObjectType[] rootObjects = getRootObjects(); final ObjectType[] rootObjects = getRootObjects();
final String uriList = Joiner.on(", ") final String uriList = Joiner.on(", ")
.join(Iterators.transform(Iterators.forArray(rootObjects), .join(Iterators.transform(Iterators.forArray(rootObjects),
GetURI)); GetURI));
...@@ -110,30 +179,27 @@ public void write(final OutputStream output) throws IOException, ...@@ -110,30 +179,27 @@ public void write(final OutputStream output) throws IOException,
" Built aggregation tree for {1} after {0}", " Built aggregation tree for {1} after {0}",
stopwatch, GetURI.apply(rootMetadata))); stopwatch, GetURI.apply(rootMetadata)));
roots.add(entry); roots.add(entry);
addToMapping(mapping, entry); addToMapping(mapping, (AggregationEntry) entry);
} }
for (final IAggregationEntry root : roots) { // now serializing the mapping. Should be moved to the end since
if (root instanceof IAggregation) // writing may change the rewrite config.
writeAggregation(zip, (IAggregation) root);
else
writeFile(zip, root);
logger.log(Level.INFO, MessageFormat.format(
" Zipped {1} after {0}", stopwatch,
root.getTextGridURI()));
}
// now serializing the mapping
zip.putNextEntry(new ZipEntry(".INDEX.imex")); zip.putNextEntry(new ZipEntry(".INDEX.imex"));
JAXB.marshal(mapping.toImportSpec(), zip); JAXB.marshal(mapping.toImportSpec(), zip);
zip.closeEntry(); zip.closeEntry();
} catch (MetadataParseFault e) {
for (final ImportObject o : mapping) {
final ImportEntry importEntry = (ImportEntry) o;
writeFile(zip, importEntry);
}
} catch (final MetadataParseFault e) {
throw new WebApplicationException(e); throw new WebApplicationException(e);
} catch (ObjectNotFoundFault e) { } catch (final ObjectNotFoundFault e) {
throw new WebApplicationException(e, Status.NOT_FOUND); throw new WebApplicationException(e, Status.NOT_FOUND);
} catch (IoFault e) { } catch (final IoFault e) {
throw new WebApplicationException(e); throw new WebApplicationException(e);
} catch (AuthFault e) { } catch (final AuthFault e) {
throw new WebApplicationException(e, Status.FORBIDDEN); throw new WebApplicationException(e, Status.FORBIDDEN);
} finally { } finally {
zip.close(); zip.close();
...@@ -164,6 +230,34 @@ private static Optional<String> getRewriteConfig(final String contentType) { ...@@ -164,6 +230,34 @@ private static Optional<String> getRewriteConfig(final String contentType) {
return Optional.fromNullable(REWRITE_CONFIGS.get(contentType)); return Optional.fromNullable(REWRITE_CONFIGS.get(contentType));
} }
private static class ImportEntry extends ImportObject {
private final AggregationEntry treeEntry;
public AggregationEntry getTreeEntry() {
return treeEntry;
}
public long getLastModified() {
return treeEntry.getMetadata().getGeneric().getGenerated()
.getLastModified().toGregorianCalendar().getTimeInMillis();
}
public ImportEntry(AggregationEntry treeEntry) {
super();
this.treeEntry = treeEntry;
setTextgridUri(treeEntry.getTextGridURI().toString());
final Optional<String> rewriteConfig = ZipResult
.getRewriteConfig(treeEntry.getFormat());
if (rewriteConfig.isPresent()) {
setRewriteMethod(RewriteMethod.XML);
setRewriteConfig(rewriteConfig.get());
} else {
setRewriteMethod(RewriteMethod.NONE);
}
}
}
/** /**
* Recursively adds the entry to the mapping. * Recursively adds the entry to the mapping.
* *
...@@ -171,79 +265,77 @@ private static Optional<String> getRewriteConfig(final String contentType) { ...@@ -171,79 +265,77 @@ private static Optional<String> getRewriteConfig(final String contentType) {
* @param entry * @param entry
*/ */
private void addToMapping(final ImportMapping mapping, private void addToMapping(final ImportMapping mapping,
final IAggregationEntry entry) { final AggregationEntry entry) {
final Optional<String> rewriteConfig = getRewriteConfig(((AggregationEntry) entry) final String format = ((AggregationEntry) entry).getFormat();
.getFormat()); if (!onlySomeFormats || isAllowedFormat(format)) {
final ImportObject importObject = new ImportObject(); final ImportEntry importObject = new ImportEntry(
importObject.setTextgridUri(entry.getTextGridURI().toString()); (AggregationEntry) entry);
importObject.setLocalData(policy.getFilename(entry, false).toString()); importObject.setLocalData(policy.getFilename(entry).toString());
importObject.setLocalMetadata(metaPolicy.getFilename(entry, false) importObject.setLocalMetadata(metaPolicy.getFilename(entry)
.toString()); .toString());
if (rewriteConfig.isPresent()) { mapping.add(importObject);
importObject.setRewriteMethod(RewriteMethod.XML);
importObject.setRewriteConfig(rewriteConfig.get());
} else {
importObject.setRewriteMethod(RewriteMethod.NONE);
} }
mapping.add(importObject);
if (entry instanceof IAggregation) { if (entry instanceof IAggregation) {
final IAggregation aggregation = (IAggregation) entry; final IAggregation aggregation = (IAggregation) entry;
for (final IAggregationEntry child : aggregation.getChildren()) { for (final IAggregationEntry child : aggregation.getChildren()) {
addToMapping(mapping, child); addToMapping(mapping, (AggregationEntry) child);
} }
} }
} }
private void writeAggregation(final ZipOutputStream zip, private boolean isAllowedFormat(final String format) {
final IAggregation root) throws IOException { if (!onlySomeFormats)
final URI uri = root.getTextGridURI(); return true;
if (written.contains(uri)) { final MediaType thisMediaType = MediaType.parse(format);
logger.log(Level.WARNING, "Skipping duplicate aggregation {0}", uri); for (final MediaType allowed : onlyFormats) {
return; if (thisMediaType.is(allowed))
} return true;
writeFile(zip, root);
final ZipEntry zipEntry = new ZipEntry(policy.getFilename(root, true)
.toString());
zip.putNextEntry(zipEntry);
zipEntry.setTime(root.getMetadata().getGeneric().getGenerated()
.getLastModified().toGregorianCalendar().getTimeInMillis());
zip.closeEntry();
for (final IAggregationEntry child : root.getChildren()) {
if (child instanceof IAggregation) {
writeAggregation(zip, (IAggregation) child);
} else {
writeFile(zip, child);
}
} }
return false;
} }
private void writeFile(final ZipOutputStream zip, private void writeFile(final ZipOutputStream zip,
final IAggregationEntry child) throws IOException { final ImportEntry importEntry) throws IOException {
URI uri = child.getTextGridURI();
if (written.contains(uri)) { writeMetadata(zip, importEntry);
logger.log(Level.WARNING, "Skipping duplicate object {0}", uri); writeMissingDirectories(zip, importEntry.getLocalData(), importEntry.getLastModified());
return;
} final ZipEntry zipEntry = new ZipEntry(importEntry.getLocalData());
written.add(uri); zipEntry.setTime(importEntry.getTreeEntry().getMetadata().getGeneric()
writeMetadata(zip, child); .getGenerated().getLastModified().toGregorianCalendar()
final ZipEntry zipEntry = new ZipEntry(policy.getFilename(child, false) .getTimeInMillis());
.toString());
zipEntry.setTime(child.getMetadata().getGeneric().getGenerated()
.getLastModified().toGregorianCalendar().getTimeInMillis());
final ImportObject importObject = mapping
.getImportObjectForTextGridURI(child.getTextGridURI()
.toString());
try { try {
final InputStream content = repository.getContent( final InputStream content = repository.getContent(importEntry
child.getTextGridURI(), getSid().orNull()); .getTreeEntry().getTextGridURI(), getSid().orNull());