From 25a35a6d21ec48691078ec5c77183bebfcb7d880 Mon Sep 17 00:00:00 2001
From: Thorsten Vitt <thorsten.vitt@uni-wuerzburg.de>
Date: Mon, 27 May 2013 12:04:24 +0000
Subject: [PATCH] Added speaking (and unambigues) filenames.

We use a transformed version of the title field plus the URI plus
a content type specific extension. To remove the URI from the file name
we would need to extract the filename policy and keep track of file
names we already used to avoid duplicates.

git-svn-id: https://develop.sub.uni-goettingen.de/repos/textgrid/trunk/services/aggregator@14101 7c539038-3410-0410-b1ec-0f2a7bf1c452
---
 pom.xml                                       |  5 +++
 .../aggregator/tree/AggregationEntry.java     | 10 ++++--
 .../aggregator/tree/FilenamePolicy.java       | 31 +++++++++++++++++++
 3 files changed, 43 insertions(+), 3 deletions(-)
 create mode 100644 src/main/java/info/textgrid/services/aggregator/tree/FilenamePolicy.java

diff --git a/pom.xml b/pom.xml
index 66eec9d..4630dc9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -149,6 +149,11 @@
                         <artifactId>servlet-api</artifactId>
                         <version>2.5</version>
                 </dependency>
+                <dependency>
+                	<groupId>com.ibm.icu</groupId>
+                	<artifactId>icu4j</artifactId>
+                	<version>51.1</version>
+                </dependency>
         </dependencies>
 
 
diff --git a/src/main/java/info/textgrid/services/aggregator/tree/AggregationEntry.java b/src/main/java/info/textgrid/services/aggregator/tree/AggregationEntry.java
index 8c2c396..f414387 100644
--- a/src/main/java/info/textgrid/services/aggregator/tree/AggregationEntry.java
+++ b/src/main/java/info/textgrid/services/aggregator/tree/AggregationEntry.java
@@ -53,9 +53,13 @@ public ObjectType getMetadata() {
 	 *            whether to append a content type specific extension.
 	 */
 	protected String getSimpleFileName(final boolean withExtension) {
-		final String baseName = URI.create(
-				getMetadata().getGeneric().getGenerated().getTextgridUri()
-						.getValue()).getSchemeSpecificPart();
+		final String baseName = 
+				FilenamePolicy.getTransliterator().transliterate(
+						getMetadata().getGeneric().getProvided().getTitle().get(0))
+				.concat(".")
+				.concat(URI.create(
+						getMetadata().getGeneric().getGenerated().getTextgridUri()
+						.getValue()).getSchemeSpecificPart());
 		if (withExtension)
 			return FileExtensionMap.getInstance().addExtension(baseName,
 					getFormat());
diff --git a/src/main/java/info/textgrid/services/aggregator/tree/FilenamePolicy.java b/src/main/java/info/textgrid/services/aggregator/tree/FilenamePolicy.java
new file mode 100644
index 0000000..56c13dc
--- /dev/null
+++ b/src/main/java/info/textgrid/services/aggregator/tree/FilenamePolicy.java
@@ -0,0 +1,31 @@
+package info.textgrid.services.aggregator.tree;
+
+import com.ibm.icu.text.Transliterator;
+
+public class FilenamePolicy {
+	
+	private static String TRANSFORM_RULES = "::Any-Latin;\n" + 
+			"{ Ä } [:LowercaseLetter:] > Ae;\n" + 
+			"Ä > AE;\n" + 
+			"{ Ö } [:LowercaseLetter:] > Oe;\n" + 
+			"Ö > OE;\n" + 
+			"{ Ü } [:LowercaseLetter:] > Ue;\n" + 
+			"Ü > UE;\n" + 
+			"ä > ae;\n" + 
+			"ö > oe;\n" + 
+			"ü > ue;\n" + 
+			"ſ > s;\n" + 
+			"ß > ss;\n" + 
+			"::Latin-ASCII;\n" + 
+			"[^A-Za-z0-9.,;!\\_\\n\\r-]+ > \\_";
+	
+	private static Transliterator transliterator;
+	
+	public static Transliterator getTransliterator() {
+		if (transliterator == null)
+			transliterator = Transliterator.createFromRules("TgFilenames", TRANSFORM_RULES, Transliterator.FORWARD);
+		return transliterator;
+	}
+	
+	
+}
-- 
GitLab