From 513407a1d15e7ee27fa854d57e892c08e6bdaa13 Mon Sep 17 00:00:00 2001 From: "Stefan E. Funk" <funk@sub.uni-goettingen.de> Date: Mon, 19 Jul 2021 15:41:01 +0200 Subject: [PATCH] Add "%2F" instead of "_" to JSON file names --- .../textgrid/middleware/OAIPMHUtilities.java | 3 + .../middleware/RecordDelivererDATACITE.java | 223 ++++++++++++++---- .../webapp/WEB-INF/oaipmh.dariah.properties | 4 +- pom.xml | 2 +- 4 files changed, 182 insertions(+), 50 deletions(-) diff --git a/oaipmh-core/src/main/java/info/textgrid/middleware/OAIPMHUtilities.java b/oaipmh-core/src/main/java/info/textgrid/middleware/OAIPMHUtilities.java index 952dab03..e3f822b0 100644 --- a/oaipmh-core/src/main/java/info/textgrid/middleware/OAIPMHUtilities.java +++ b/oaipmh-core/src/main/java/info/textgrid/middleware/OAIPMHUtilities.java @@ -236,6 +236,9 @@ public class OAIPMHUtilities { public static String datestampAsString(String originalDateTimeString) throws ParseException { SimpleDateFormat tgItemTime = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.S"); + + System.out.println("DATE? " + originalDateTimeString); + Date date = tgItemTime.parse(originalDateTimeString); SimpleDateFormat outFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); outFormatter.setTimeZone(TimeZone.getTimeZone("UTC")); diff --git a/oaipmh-core/src/main/java/info/textgrid/middleware/RecordDelivererDATACITE.java b/oaipmh-core/src/main/java/info/textgrid/middleware/RecordDelivererDATACITE.java index b1b100e0..d8fc2afe 100644 --- a/oaipmh-core/src/main/java/info/textgrid/middleware/RecordDelivererDATACITE.java +++ b/oaipmh-core/src/main/java/info/textgrid/middleware/RecordDelivererDATACITE.java @@ -11,6 +11,7 @@ import org.apache.commons.logging.LogFactory; import org.elasticsearch.common.Strings; import org.json.JSONObject; import org.springframework.stereotype.Component; +import info.textgrid.middleware.common.TextGridMimetypes; import info.textgrid.middleware.oaipmh.ContributorType; import info.textgrid.middleware.oaipmh.DateType; import info.textgrid.middleware.oaipmh.GetRecordType; @@ -46,7 +47,6 @@ import info.textgrid.middleware.oaipmh.Resource.Subjects; import info.textgrid.middleware.oaipmh.Resource.Subjects.Subject; import info.textgrid.middleware.oaipmh.Resource.Titles; import info.textgrid.middleware.oaipmh.Resource.Titles.Title; -import info.textgrid.middleware.oaipmh.TitleType; /** * @author Maximilian Brodhun, SUB Göttingen @@ -118,6 +118,11 @@ public class RecordDelivererDATACITE extends RecordDelivererAbstract { Strings.EMPTY_ARRAY) .getSource()); + if (this.jsonObj == null || this.jsonObj.isEmpty()) { + throw new IOException( + "No data could be retrieved from ElasticSearch for ID: " + idInElasticSearchIndex); + } + System.out.println("jsonObject: " + this.jsonObj); // Set response header. @@ -204,14 +209,14 @@ public class RecordDelivererDATACITE extends RecordDelivererAbstract { this.resource.setRelatedIdentifiers(this.addRelatedIdentifiers()); this.resource.setIdentifier(this.addIdentifier()); this.resource.setCreators(this.addCreators()); - this.addGeoLocation(); + // FIXME Check if we can have empty elements here! Was: Check for null in this.addGeoLocation() + // method!) + this.resource.setGeoLocations(this.addGeoLocation()); this.resource.setResourceType(addResourceType()); this.resource.setVersion(this.addVersion()); - - if (addSubjects().getSubject().get(0).getValue() != null) { - this.resource.setSubjects(this.addSubjects()); // TODO what if subjects == null? - } - + // FIXME Check if we can have empty elements here! Was: Check for null in this.addSubjects() + // method!) + this.resource.setSubjects(this.addSubjects()); this.resource.setSizes(this.addSize()); return this.resource; @@ -241,19 +246,43 @@ public class RecordDelivererDATACITE extends RecordDelivererAbstract { Subjects subjects = new Subjects(); - for (String subjectField : this.oarSubjectFields) { - Subject subject = new Subject(); - subject.setValue(OAIPMHUtilities.fieldLoader(this.jsonObj, subjectField + ".value")); - subject.setSchemeURI(OAIPMHUtilities.fieldLoader(this.jsonObj, subjectField + ".id.type")); - subject - .setSubjectScheme(OAIPMHUtilities.fieldLoader(this.jsonObj, subjectField + ".id.value")); - subjects.getSubject().add(subject); + // ** + // TextGrid + // ** + + if (this.textgrid) { + for (String subjectField : this.oarSubjectFields) { + Subject subject = new Subject(); + subject.setValue(OAIPMHUtilities.fieldLoader(this.jsonObj, subjectField + ".value")); + subject.setSchemeURI(OAIPMHUtilities.fieldLoader(this.jsonObj, subjectField + ".id.type")); + subject.setSubjectScheme( + OAIPMHUtilities.fieldLoader(this.jsonObj, subjectField + ".id.value")); + subjects.getSubject().add(subject); + } } - if (subjects.getSubject().get(0) != null) { - return subjects; - } else { - return null; + + // ** + // DARIAH + // ** + + else if (this.dariah) { + for (String subjectField : this.oarSubjectFields) { + List<String> sList = OAIPMHUtilities.fieldLoader(this.jsonObj, new String[] {subjectField}); + for (String s : sList) { + Subject subject = new Subject(); + + log.debug("subject: " + s); + + subject.setValue(s); + // TODO Extract subject schema and schema URI somehow? + // subject.setSchemeURI(); + // subject.setSubjectScheme(); + subjects.getSubject().add(subject); + } + } } + + return subjects; } /** @@ -273,12 +302,73 @@ public class RecordDelivererDATACITE extends RecordDelivererAbstract { /** * @return */ - private static ResourceType addResourceType() { + private ResourceType addResourceType() { ResourceType resourceType = new ResourceType(); - resourceType.setResourceTypeGeneral(info.textgrid.middleware.oaipmh.ResourceType.DATASET); - // FIXME What about collection? Could editions be seen as collection datasets? + // ** + // TextGrid + // ** + + if (this.textgrid) { + for (String format : this.oarFormatFields) { + String resourceValue = OAIPMHUtilities.fieldLoader(this.jsonObj, format); + // TODO Set resource value here? + resourceType.setValue(resourceValue); + // Set collection for all aggregation types, take also into account: images and text. + if (TextGridMimetypes.AGGREGATION_SET.contains(format)) { + resourceType + .setResourceTypeGeneral(info.textgrid.middleware.oaipmh.ResourceType.COLLECTION); + } else if (TextGridMimetypes.IMAGE_SET.contains(format)) { + resourceType + .setResourceTypeGeneral(info.textgrid.middleware.oaipmh.ResourceType.IMAGE); + } else if (TextGridMimetypes.ORIGINAL_SET.contains(format)) { + resourceType + .setResourceTypeGeneral(info.textgrid.middleware.oaipmh.ResourceType.TEXT); + } + // Use dataset for everything else. + else { + resourceType.setResourceTypeGeneral(info.textgrid.middleware.oaipmh.ResourceType.DATASET); + } + // We take only the first of all the resource types! + if (resourceType.getValue() != null && !resourceType.getValue().isEmpty()) { + break; + } + } + } + + // ** + // DARIAH + // ** + + else if (this.dariah) { + + System.out.println(" ## oarFormatFields: " + this.oarFormatFields.length); + + for (String format : this.oarFormatFields) { + // Only the first value is taken here! We do not need to create a list! + // TODO Please see addLanguage(), maybe do it as done there to use the other fielsLoader() + // method! + String resourceValue = OAIPMHUtilities.fieldLoader(this.jsonObj, format); + + System.out.println(" ## resourceType: " + format + " --> " + resourceValue); + + resourceType.setValue(resourceValue); + // Set collection for DARIAH collection type here, data object for everything else. + if (TextGridMimetypes.DARIAH_COLLECTION.equals(format)) { + resourceType + .setResourceTypeGeneral(info.textgrid.middleware.oaipmh.ResourceType.COLLECTION); + } + // Use dataset for everything else. + else { + resourceType.setResourceTypeGeneral(info.textgrid.middleware.oaipmh.ResourceType.DATASET); + } + // We take only the first of all the resource types! + if (resourceType.getValue() != null && !resourceType.getValue().isEmpty()) { + break; + } + } + } return resourceType; } @@ -286,7 +376,7 @@ public class RecordDelivererDATACITE extends RecordDelivererAbstract { /** * */ - private void addGeoLocation() { + private GeoLocations addGeoLocation() { GeoLocations geoLocations = new GeoLocations(); @@ -296,11 +386,12 @@ public class RecordDelivererDATACITE extends RecordDelivererAbstract { OAIPMHUtilities.fieldLoader(this.jsonObj, geoLocationField + ".value")); geoLocations.getGeoLocation().add(geoLocation); } - if (geoLocations.getGeoLocation().get(0).getGeoLocationPlace() != null) { - this.resource.setGeoLocations(geoLocations); - } - // return geoLocations; + // if (geoLocations.getGeoLocation().get(0).getGeoLocationPlace() != null) { + // this.resource.setGeoLocations(geoLocations); + // } + + return geoLocations; } /** @@ -313,7 +404,7 @@ public class RecordDelivererDATACITE extends RecordDelivererAbstract { for (String relatedIdentifierField : this.oarRelatedIdentifierFields) { - System.out.println("relatedIdentifierField: " + relatedIdentifierField); + log.debug("relatedIdentifierField: " + relatedIdentifierField); // ** // TextGrid @@ -350,7 +441,7 @@ public class RecordDelivererDATACITE extends RecordDelivererAbstract { OAIPMHUtilities.fieldLoader(this.jsonObj, new String[] {relatedIdentifierField}); for (String i : relatedIdentiferList) { - System.out.println("i: " + i); + log.debug("i: " + i); RelatedIdentifier relatedID = new RelatedIdentifier(); relatedID.setValue(i); @@ -360,13 +451,15 @@ public class RecordDelivererDATACITE extends RecordDelivererAbstract { relatedID.setRelatedIdentifierType(RelatedIdentifierType.DOI); } else if (i.startsWith("http")) { relatedID.setRelatedIdentifierType(RelatedIdentifierType.URL); - } else { - // TODO Check other prefixes and add other values, too? - // TODO No OTHER value existing! What do we do here, if none of the above apply? } - // TODO We have to check relation type, too? Firstly we take part of a collection - // here(that's what we put in while publishing). - relatedID.setRelationType(RelationType.IS_PART_OF); + // TODO Check other prefixes and add other values, too? + else { + // TODO No type OTHER existing! Value is mandatory, so we chose URL, what else can we + // do? + relatedID.setRelatedIdentifierType(RelatedIdentifierType.URL); + } + // Relation type is REFERENCES for the time being (coming from dc:relation at the moment). + relatedID.setRelationType(RelationType.REFERENCES); relatedIdentifiers.getRelatedIdentifier().add(relatedID); } } @@ -446,16 +539,48 @@ public class RecordDelivererDATACITE extends RecordDelivererAbstract { RightsList rightsList = new RightsList(); - for (String rightsField : this.oarRightsFields) { - Rights rights = new Rights(); - rights.setRightsURI(OAIPMHUtilities.fieldLoader(this.jsonObj, rightsField + ".licenseUri")); - rights.setValue(OAIPMHUtilities.fieldLoader(this.jsonObj, rightsField + ".value")); - rightsList.getRights().add(rights); + // ** + // Textgrid + // ** + + if (this.textgrid) { + for (String rightsField : this.oarRightsFields) { + Rights rights = new Rights(); + rights.setRightsURI(OAIPMHUtilities.fieldLoader(this.jsonObj, rightsField + ".licenseUri")); + rights.setValue(OAIPMHUtilities.fieldLoader(this.jsonObj, rightsField + ".value")); + rightsList.getRights().add(rights); + } + if (rightsList.getRights().get(0).getValue() != null) { + Rights openAccesRight = new Rights(); + openAccesRight.setRightsURI("info:eu-repo/semantics/openAccess"); + rightsList.getRights().add(openAccesRight); + } } - if (rightsList.getRights().get(0).getValue() != null) { - Rights openAccesRight = new Rights(); - openAccesRight.setRightsURI("info:eu-repo/semantics/openAccess"); - rightsList.getRights().add(openAccesRight); + + // ** + // DARIAH + // ** + + else if (this.dariah) { + for (String rightsField : this.oarRightsFields) { + Rights rights = new Rights(); + + List<String> rList = OAIPMHUtilities.fieldLoader(this.jsonObj, new String[] {rightsField}); + + for (String r : rList) { + + log.debug("right: " + r); + + // Set rights URI assuming every "http://" or "https://" really IS an URI. + if (r.startsWith("http")) { + rights.setRightsURI(r); + } + // Set value in every case, even if doubled with URI. + rights.setValue(r); + } + + rightsList.getRights().add(rights); + } } return rightsList; @@ -528,10 +653,13 @@ public class RecordDelivererDATACITE extends RecordDelivererAbstract { String language = null; - // TODO Use only ONE language here? What if we have more then one in DARIAH DC data? - + // TODO Only ONE language is permitted in Datacite schema? What if we have more then one in + // DARIAH DC data? Take only first one for the moment! for (String languageField : this.oarLanguageFields) { - language = OAIPMHUtilities.fieldLoader(this.jsonObj, languageField); + List<String> langs = OAIPMHUtilities.fieldLoader(this.jsonObj, new String[] {languageField}); + if (!langs.isEmpty()) { + language = langs.get(0); + } } return language; @@ -735,7 +863,8 @@ public class RecordDelivererDATACITE extends RecordDelivererAbstract { for (String title : titleList) { Title tileInOpenAireRecord = new Title(); tileInOpenAireRecord.setValue(title); - tileInOpenAireRecord.setTitleType(TitleType.OTHER); + // Title type OTHER is not permitted, leave type out for the moment! + // tileInOpenAireRecord.setTitleType(TitleType.OTHER); titles.getTitle().add(tileInOpenAireRecord); } } diff --git a/oaipmh-webapp/src/main/webapp/WEB-INF/oaipmh.dariah.properties b/oaipmh-webapp/src/main/webapp/WEB-INF/oaipmh.dariah.properties index 0240a8d9..f9d99173 100644 --- a/oaipmh-webapp/src/main/webapp/WEB-INF/oaipmh.dariah.properties +++ b/oaipmh-webapp/src/main/webapp/WEB-INF/oaipmh.dariah.properties @@ -43,7 +43,7 @@ metadataFormatListInstance = MetadataFormatListDelivererDH ## Field Settings ## ###################### -fields = administrativeMetadata.dcterms:modified, administrativeMetadata.dcterms:created, administrativeMetadata.dcterms:identifier, administrativeMetadata.dcterms:extent, administrativeMetadata.dcterms:relation, descriptiveMetadata.dc:contributor, descriptiveMetadata.dc:coverage, descriptiveMetadata.dc:creator, descriptiveMetadata.dc:date, descriptiveMetadata.dc:description, descriptiveMetadata.dc:format, descriptiveMetadata.dc:identifier, descriptiveMetadata.dc:language, descriptiveMetadata.dc:publisher, descriptiveMetadata.dc:relation, descriptiveMetadata.dc:rights, descriptiveMetadata.dc:source, descriptiveMetadata.dc:subject, descriptiveMetadata.dc:title, descriptiveMetadata.dc:type, descriptiveMetadata.dc:relation +fields = administrativeMetadata.dcterms:modified, administrativeMetadata.dcterms:created, administrativeMetadata.dcterms:identifier, administrativeMetadata.dcterms:extent, administrativeMetadata.dcterms:relation, administrativeMetadata.dcterms:format, descriptiveMetadata.dc:contributor, descriptiveMetadata.dc:coverage, descriptiveMetadata.dc:creator, descriptiveMetadata.dc:date, descriptiveMetadata.dc:description, descriptiveMetadata.dc:format, descriptiveMetadata.dc:identifier, descriptiveMetadata.dc:language, descriptiveMetadata.dc:publisher, descriptiveMetadata.dc:relation, descriptiveMetadata.dc:rights, descriptiveMetadata.dc:source, descriptiveMetadata.dc:subject, descriptiveMetadata.dc:title, descriptiveMetadata.dc:type, descriptiveMetadata.dc:relation workFields = descriptiveMetadata.dc:title ###################### @@ -66,7 +66,7 @@ oar.geoLocationFields = descriptiveMetadata.dc:coverage oar.handle = administrativeMetadata.dcterms:identifier oar.versionFields = oar.subjectFields = descriptiveMetadata.dc:subject -oar.sizeField = administrativeMetadata.dcterms:extent +oar.sizeField = administrativeMetadata.dcterms:extent ########################## ## Dublin Core Fields ## diff --git a/pom.xml b/pom.xml index 27a0f462..49ebb55d 100644 --- a/pom.xml +++ b/pom.xml @@ -8,7 +8,7 @@ <packaging>pom</packaging> <name>DARIAHDE :: OAI-PMH DataProvider</name> <properties> - <common.version>3.7.0-ES6-SNAPSHOT</common.version> + <common.version>3.9.0-ES6-SNAPSHOT</common.version> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <cxf.version>3.2.8</cxf.version> <spring.version>4.0.2.RELEASE</spring.version> -- GitLab