Skip to content
Snippets Groups Projects
Commit e09052b0 authored by Thorsten Vitt's avatar Thorsten Vitt
Browse files

Rewriting for EPUB

parent 4c715c75
No related branches found
No related tags found
No related merge requests found
......@@ -101,7 +101,7 @@
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>12.0</version>
<version>13.0.1</version>
</dependency>
<dependency>
<groupId>info.textgrid.middleware</groupId>
......
......@@ -6,16 +6,21 @@
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.IoFault;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.MetadataParseFault;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.ObjectNotFoundFault;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.ProtocolNotImplementedFault;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.TGCrudService;
import info.textgrid.utils.linkrewriter.ConfigurableXMLRewriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.text.MessageFormat;
import java.util.Deque;
import java.util.LinkedList;
import java.util.Map.Entry;
import java.util.logging.Logger;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
......@@ -27,6 +32,7 @@
import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.Response.Status;
import javax.xml.stream.XMLStreamException;
import javax.xml.transform.stream.StreamSource;
import net.sf.saxon.s9api.Processor;
......@@ -35,6 +41,9 @@
import net.sf.saxon.s9api.XsltExecutable;
import net.sf.saxon.s9api.XsltTransformer;
import org.xml.sax.SAXException;
import com.google.common.io.ByteStreams;
import com.google.common.io.Files;
@Path("/epub")
......@@ -51,38 +60,40 @@ public EPUB() {
xsltProcessor = new Processor(false);
final XsltCompiler xsltCompiler = xsltProcessor.newXsltCompiler();
try {
teiToEpub = xsltCompiler.compile(new StreamSource("/usr/share/xml/tei/stylesheet/epub/tei-to-epub.xsl"));
teiToEpub = xsltCompiler.compile(new StreamSource(
"/usr/share/xml/tei/stylesheet/epub/tei-to-epub.xsl"));
} catch (final SaxonApiException e) {
throw new IllegalStateException(e);
}
}
@GET
@Path(value="/{object}")
@Produces(value="application/epub+zip")
@Path(value = "/{object}")
@Produces(value = "application/epub+zip")
public Response get(@PathParam("object") final URI uri) {
logger.fine("EPUB called for root object: " + uri);
final TGCrudService crud = repository.getCRUDService();
try {
final MetadataContainerType container = crud.readMetadata(null, null, uri.toString());
final MetadataContainerType container = crud.readMetadata(null,
null, uri.toString());
final ObjectType rootObject = container.getObject();
final String mimeType = rootObject.getGeneric().getProvided().getFormat();
final String mimeType = rootObject.getGeneric().getProvided()
.getFormat();
final boolean aggregation = mimeType.contains("aggregation");
if (!aggregation && !mimeType.matches("^text/.*xml.*")) {
final String errorMsg = "The EPUB export can only convert aggregations or XML documents to EPUB, however, the document {0} you referred to has the MIME type {1}.";
logger.warning("Failing with: " + errorMsg);
return Response
.status(Status.UNSUPPORTED_MEDIA_TYPE)
return Response.status(Status.UNSUPPORTED_MEDIA_TYPE)
.entity(MessageFormat.format(errorMsg, uri, mimeType))
.type("text/plain")
.build();
.type("text/plain").build();
}
final File workingDir = Files.createTempDir();
logger.fine("Using " + workingDir + " to build the E-Book");
// First, use the aggregator to create a TEI corpus file to build on
final TEICorpusSerializer corpusSerializer = new TEICorpusSerializer(rootObject, true);
final TEICorpusSerializer corpusSerializer = new TEICorpusSerializer(
rootObject, true);
final File corpus = new File(workingDir, "corpus.xml");
final FileOutputStream corpusOutput = new FileOutputStream(corpus);
corpusSerializer.write(corpusOutput);
......@@ -91,22 +102,36 @@ public Response get(@PathParam("object") final URI uri) {
// Now, run the EPUB stylesheet
// TODO cache the saxon stuff
final XsltTransformer transformer = teiToEpub.load();
transformer.setDestination(xsltProcessor.newSerializer(new File(workingDir, "output.xml")));
transformer.setDestination(xsltProcessor.newSerializer(new File(
workingDir, "output.xml")));
transformer.setSource(new StreamSource(corpus));
transformer.transform();
// Finally, create the ZIP file.
// FIXME first as a file
final File zipFile = new File(workingDir, "ebook.epub");
final ZipOutputStream zip = new ZipOutputStream(new FileOutputStream(zipFile));
final ZipOutputStream zip = new ZipOutputStream(
new FileOutputStream(zipFile));
try {
// first entry is the uncompressed mimetype marker
new MimeTypeEntry("application/epub+zip").writeTo(zip);
// now filtered filesystem structure. FIXME add rewriting + images
// now filtered filesystem structure. FIXME add rewriting +
// images
final File mimeTypeFile = new File(workingDir, "mimetype");
final URI base = workingDir.toURI();
final File opsDir = new File(workingDir, "OPS");
final URI ops = opsDir.toURI();
final OPFManifest manifest = new OPFManifest(new File(opsDir,
"content.opf"));
final ConfigurableXMLRewriter xhtmlRewriter = new ConfigurableXMLRewriter(
manifest.getImportMapping(), true);
xhtmlRewriter.configure(URI.create("internal:html#html"));
final ConfigurableXMLRewriter opfRewriter = new ConfigurableXMLRewriter(
manifest.getImportMapping(), true);
opfRewriter.configure(URI.create("internal:epub#opf"));
final Deque<File> queue = new LinkedList<File>();
queue.push(workingDir);
while (!queue.isEmpty()) {
......@@ -114,7 +139,8 @@ public Response get(@PathParam("object") final URI uri) {
for (final File child : directory.listFiles()) {
// filter stuff we don't want:
if (child.equals(mimeTypeFile) || child.equals(corpus) || child.equals(zipFile))
if (child.equals(mimeTypeFile) || child.equals(corpus)
|| child.equals(zipFile))
continue;
String name = base.relativize(child.toURI()).getPath();
......@@ -125,20 +151,52 @@ public Response get(@PathParam("object") final URI uri) {
zip.putNextEntry(new ZipEntry(name));
} else {
zip.putNextEntry(new ZipEntry(name));
Files.copy(child, zip);
if (Files.getFileExtension(name).equals("html"))
xhtmlRewriter.rewrite(
new FileInputStream(child), zip);
else if (Files.getFileExtension(name).equals("opf"))
opfRewriter.rewrite(new FileInputStream(child),
zip);
else
Files.copy(child, zip);
zip.closeEntry();
}
}
}
// now we need to add those files that are referenced by
// absolute URI in the manifest
for (final Entry<URI, String> externalItem : manifest.externalItems.entrySet()) {
final String pseudoFileName = base.relativize(new File(opsDir, manifest.getFileName(externalItem.getKey().toString())).toURI()).getPath();
zip.putNextEntry(new ZipEntry(pseudoFileName));
ByteStreams.copy(repository.getContent(externalItem.getKey()), zip);
zip.closeEntry();
}
} catch (final SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (final URISyntaxException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (final XMLStreamException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (final ProtocolNotImplementedFault e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
zip.close();
}
return RESTUtils.attachmentResponse(rootObject.getGeneric().getProvided().getTitle().get(0) + ".epub")
.type("application/epub+zip")
.entity(zipFile)
.build();
// return Response.ok().type("text/plain").entity("Done, see " + workingDir).build();
return RESTUtils
.attachmentResponse(
rootObject.getGeneric().getProvided().getTitle()
.get(0)
+ ".epub").type("application/epub+zip")
.entity(zipFile).build();
// return Response.ok().type("text/plain").entity("Done, see " +
// workingDir).build();
} catch (final ObjectNotFoundFault e) {
// TODO Auto-generated catch block
e.printStackTrace();
......@@ -166,8 +224,6 @@ public Response get(@PathParam("object") final URI uri) {
}
return Response.noContent().build();
}
}
package info.textgrid.services.aggregator;
import info.textgrid.middleware.tgsearch.client.SearchClient;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.AuthFault;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.IoFault;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.MetadataParseFault;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.ObjectNotFoundFault;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.ProtocolNotImplementedFault;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.TGCrudService;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import javax.ws.rs.WebApplicationException;
public interface ITextGridRep {
......@@ -13,5 +22,8 @@ public interface ITextGridRep {
public String getConfValue(final String key) throws WebApplicationException;
public abstract InputStream getContent(final URI uri)
throws ObjectNotFoundFault, MetadataParseFault, IoFault,
ProtocolNotImplementedFault, AuthFault, IOException;
}
\ No newline at end of file
package info.textgrid.services.aggregator;
import info.textgrid._import.ImportObject;
import info.textgrid._import.RewriteMethod;
import info.textgrid.utils.linkrewriter.ImportMapping;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Map;
import java.util.Map.Entry;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSortedMap;
import com.google.common.collect.Maps;
public class OPFManifest {
public static final String OPF_NS = "http://www.idpf.org/2007/opf";
public final ImmutableMap<URI, String> externalItems;
public static final ImmutableMap<String, String> EXTENSION = ImmutableMap
.of("image/jpeg", ".jpg", "image/png", ".png", "text/html", ".html");
private ImportMapping importMapping;
public OPFManifest(final File opfFile) throws SAXException, IOException,
URISyntaxException {
final DocumentBuilderFactory factory = DocumentBuilderFactory
.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder;
try {
builder = factory.newDocumentBuilder();
final Document document = builder.parse(opfFile);
final NodeList itemList = document.getElementsByTagNameNS(OPF_NS,
"item");
final Map<URI, String> map = Maps.newLinkedHashMap();
for (int i = 0; i < itemList.getLength(); i++) {
final Element item = (Element) itemList.item(i);
final URI uri = new URI(item.getAttribute("href"));
if (uri.isAbsolute() && !map.containsKey(uri))
map.put(uri, item.getAttribute("media-type"));
}
externalItems = ImmutableSortedMap.copyOf(map);
} catch (final ParserConfigurationException e) {
throw new IllegalStateException(e);
}
}
public ImportMapping getImportMapping() {
if (importMapping == null) {
importMapping = new ImportMapping();
for (final Entry<URI, String> entry : externalItems.entrySet()) {
final String fileName = entry.getKey().getSchemeSpecificPart()
.concat(EXTENSION.get(entry.getValue()));
final ImportObject io = new ImportObject();
io.setTextgridUri(entry.getKey().toString());
io.setLocalData(fileName);
if (entry.getValue().equals("application/xhtml+xml")) {
io.setRewriteMethod(RewriteMethod.XML);
io.setRewriteConfig("internal:html#html");
}
importMapping.add(io);
}
};
return importMapping;
}
public String getFileName(final String uri) {
return getImportMapping().getImportObjectForTextGridURI(uri).getLocalData();
}
}
......@@ -3,17 +3,27 @@
import info.textgrid.middleware.confclient.ConfservClient;
import info.textgrid.middleware.confclient.ConfservClientConstants;
import info.textgrid.middleware.tgsearch.client.SearchClient;
import info.textgrid.namespaces.metadata.core._2010.MetadataContainerType;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.AuthFault;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.IoFault;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.MetadataParseFault;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.ObjectNotFoundFault;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.ProtocolNotImplementedFault;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.TGCrudService;
import info.textgrid.namespaces.middleware.tgcrud.services.tgcrudservice.TGCrudService_Service;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import javax.activation.DataHandler;
import javax.ws.rs.WebApplicationException;
import javax.xml.stream.XMLStreamException;
import javax.xml.ws.BindingProvider;
import javax.xml.ws.Holder;
import javax.xml.ws.soap.MTOMFeature;
import org.codehaus.jettison.json.JSONException;
......@@ -57,6 +67,15 @@ public TGCrudService getCRUDService() {
return crud;
}
@Override
public InputStream getContent(final URI uri) throws ObjectNotFoundFault, MetadataParseFault, IoFault, ProtocolNotImplementedFault, AuthFault, IOException {
final TGCrudService crudService = getCRUDService();
final Holder<MetadataContainerType> mdHolder = new Holder<MetadataContainerType>();
final Holder<DataHandler> dHolder = new Holder<DataHandler>();
crudService.read(null, null, uri.toString(), mdHolder, dHolder);
return dHolder.value.getInputStream();
}
@Override
public SearchClient getPublicSearchClient() {
if (publicSearchClient == null)
......
package info.textgrid.services.aggregator;
import java.net.URI;
import javax.ws.rs.core.Response;
import org.junit.Test;
public class EPUBTest {
@Test
public void testGet() {
final EPUB epub = new EPUB();
final Response response = epub.get(URI.create("textgrid:jfst.0"));
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment