diff --git a/oaipmh-core/pom.xml b/oaipmh-core/pom.xml index c097bb2deda971e84fcc5c23c564c60d6d2c262a..cf6a0b4c8bd99b9184a5945c47414660598bd38e 100644 --- a/oaipmh-core/pom.xml +++ b/oaipmh-core/pom.xml @@ -12,12 +12,28 @@ <packaging>jar</packaging> <name>DARIAHDE :: OAI-PMH DataProvider :: Core</name> <url>http://maven.apache.org</url> + <dependencies> + <dependency> + <groupId>info.textgrid.utils</groupId> + <artifactId>httpclients</artifactId> + <version>${textgrid.httpclients.version}</version> + </dependency> + <dependency> + <groupId>org.eclipse.rdf4j</groupId> + <artifactId>rdf4j-repository-api</artifactId> + <version>3.0.2</version> +</dependency> <dependency> <groupId>info.textgrid.middleware</groupId> <artifactId>crud-common</artifactId> <version>${tgcrud.version}</version> </dependency> + <dependency> + <groupId>info.textgrid.middleware.clients</groupId> + <artifactId>textgrid-clients</artifactId> + <version>3.2.5</version> + </dependency> <dependency> <groupId>org.apache.cxf</groupId> <artifactId>cxf-rt-rs-security-cors</artifactId> diff --git a/oaipmh-core/src/main/java/info/textgrid/middleware/DublinCoreFieldLoader.java b/oaipmh-core/src/main/java/info/textgrid/middleware/DublinCoreFieldLoader.java index 87a31679e54e852d4129d85c4ff38632685ea101..2451272a1a377ab6236c444c9934c7b450daf6e3 100644 --- a/oaipmh-core/src/main/java/info/textgrid/middleware/DublinCoreFieldLoader.java +++ b/oaipmh-core/src/main/java/info/textgrid/middleware/DublinCoreFieldLoader.java @@ -4,7 +4,6 @@ import java.text.ParseException; import java.util.ArrayList; import java.util.List; import java.util.Map; -import org.apache.commons.logging.Log; import javax.xml.datatype.DatatypeConfigurationException; import org.apache.commons.logging.LogFactory; import org.elasticsearch.action.get.GetResponse; @@ -159,7 +158,7 @@ public class DublinCoreFieldLoader { public static List<String> fillList(SearchHit hit, String[] fields) { List<String> list = new ArrayList<String>(); - + if (fields != null) { for (String field : fields) { if (hit.getSourceAsMap().get(field) == null) { diff --git a/oaipmh-core/src/main/java/info/textgrid/middleware/OAIPMHImpl.java b/oaipmh-core/src/main/java/info/textgrid/middleware/OAIPMHImpl.java index 8ffb5fb07a00a068ec57d6ea332ff19b6326d994..d6f659e40d30d905265fded36cf2a678667e92e9 100644 --- a/oaipmh-core/src/main/java/info/textgrid/middleware/OAIPMHImpl.java +++ b/oaipmh-core/src/main/java/info/textgrid/middleware/OAIPMHImpl.java @@ -276,7 +276,7 @@ public class OAIPMHImpl implements OAIPMHProducer { // If metadataFormat IS SET, set recordListDeliverer accordingly. IdentifierListDelivererInterface idListDeliv = null; if (request.getMetadataPrefix() != null) { - if (request.getMetadataPrefix().equals(OAIPMHUtilities.OAIDC_PREFIX)) { + if (request.getMetadataPrefix().equals(TGConstants.METADATA_DC_PREFIX)) { idListDeliv = this.identifierListDC; } else { idListDeliv = this.identifierListIDIOM; @@ -298,23 +298,11 @@ public class OAIPMHImpl implements OAIPMHProducer { } } - ListIdentifiersType listIdentifiers = null; - try { - listIdentifiers = idListDeliv.processIdentifierList(request.getFrom(), request.getUntil(), - request.getSet(), request.getResumptionToken()); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } + ListIdentifiersType listIdentifiers = idListDeliv.processIdentifierList(request.getFrom(), + request.getUntil(), request.getSet(), request.getResumptionToken()); if (listIdentifiers != null) { - if (this.identifierListDC.getResultSize() == 0) { - requestErrors.setError("RecordMatchError", - "The combination of the values of the from, until, set and metadataPrefix arguments results in an empty list."); - oaipmhRoot.getError().add(requestErrors.getError()); - } else { - oaipmhRoot.setListIdentifiers(listIdentifiers); - } + oaipmhRoot.setListIdentifiers(listIdentifiers); } else { ErrorHandler idError = new ErrorHandler(); idError.setError(TGConstants.OAI_NO_RECORD_MATCH, "The value of the identifier: " @@ -416,7 +404,7 @@ public class OAIPMHImpl implements OAIPMHProducer { // If metadataFormat IS SET, set recordListDeliverer accordingly. RecordListDelivererInterface recListDeliv = null; if (request.getMetadataPrefix() != null) { - if (request.getMetadataPrefix().equals(OAIPMHUtilities.OAIDC_PREFIX)) { + if (request.getMetadataPrefix().equals(TGConstants.METADATA_DC_PREFIX)) { recListDeliv = this.recordListDC; } else { recListDeliv = this.recordListIDIOM; @@ -441,13 +429,7 @@ public class OAIPMHImpl implements OAIPMHProducer { request.getSet(), request.getResumptionToken()); if (listRecords != null) { - if (this.recordListDC.getResultSize() == 0) { - requestErrors.setError("RecordMatchError", - "The combination of the values of the from, until, set and metadataPrefix arguments results in an empty list."); - oaipmhRoot.getError().add(requestErrors.getError()); - } else { - oaipmhRoot.setListRecords(listRecords); - } + oaipmhRoot.setListRecords(listRecords); } } @@ -711,4 +693,4 @@ public class OAIPMHImpl implements OAIPMHProducer { return result; } -} +} \ No newline at end of file diff --git a/oaipmh-core/src/main/java/info/textgrid/middleware/OAIPMHProducer.java b/oaipmh-core/src/main/java/info/textgrid/middleware/OAIPMHProducer.java index 77d71174979a2f2d8d61427441f1336c57c09fda..38f03be698493309be1aedb729ac63b524874bd2 100644 --- a/oaipmh-core/src/main/java/info/textgrid/middleware/OAIPMHProducer.java +++ b/oaipmh-core/src/main/java/info/textgrid/middleware/OAIPMHProducer.java @@ -1,5 +1,7 @@ package info.textgrid.middleware; +import java.io.IOException; + import javax.ws.rs.DefaultValue; import javax.ws.rs.FormParam; import javax.ws.rs.GET; @@ -40,6 +42,7 @@ public interface OAIPMHProducer { * @param until - End value to filter the response for a specific interval * @param resumptionToken - Indicates how many value will be send back in the response * @return OAIPMHType object containing the whole response + * @throws IOException */ @GET @Path("/") @@ -50,7 +53,7 @@ public interface OAIPMHProducer { @QueryParam("set") @DefaultValue("") String set, @QueryParam("from") @DefaultValue("") String from, @QueryParam("until") @DefaultValue("") String until, - @QueryParam("resumptionToken") @DefaultValue("") String resumptionToken); + @QueryParam("resumptionToken") @DefaultValue("") String resumptionToken) throws IOException; /** * <p> @@ -72,6 +75,7 @@ public interface OAIPMHProducer { * @param until - End value to filter the response for a specific interval * @param resumptionToken - Indicates how many value will be send back in the response * @return OAIPMHType object containing the whole response + * @throws IOException */ @POST @Path("/") @@ -82,7 +86,7 @@ public interface OAIPMHProducer { @FormParam("set") @DefaultValue("") String set, @FormParam("from") @DefaultValue("") String from, @FormParam("until") @DefaultValue("") String until, - @FormParam("resumptionToken") @DefaultValue("") String resumptionToken); + @FormParam("resumptionToken") @DefaultValue("") String resumptionToken) throws IOException; /** * @return diff --git a/oaipmh-core/src/main/java/info/textgrid/middleware/OAIPMHUtilities.java b/oaipmh-core/src/main/java/info/textgrid/middleware/OAIPMHUtilities.java index 9f1b13f8ab24f8e9109a18f1f0e0c96df04f1591..69ce9223b32ba649d21410c5d83c109b45e28c1d 100644 --- a/oaipmh-core/src/main/java/info/textgrid/middleware/OAIPMHUtilities.java +++ b/oaipmh-core/src/main/java/info/textgrid/middleware/OAIPMHUtilities.java @@ -1,19 +1,33 @@ package info.textgrid.middleware; +import java.io.IOException; +import java.io.InputStream; import java.math.BigInteger; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.util.ArrayList; import java.util.Date; import java.util.GregorianCalendar; +import java.util.List; import java.util.Map; import java.util.TimeZone; import javax.xml.datatype.DatatypeConfigurationException; import javax.xml.datatype.DatatypeFactory; import javax.xml.datatype.XMLGregorianCalendar; +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamConstants; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; + +import org.apache.commons.io.IOUtils; import org.apache.commons.logging.LogFactory; + +import info.textgrid.clients.AuthClient; +import info.textgrid.clients.tgauth.AuthClientException; import info.textgrid.middleware.oaipmh.ListMetadataFormatsType; import info.textgrid.middleware.oaipmh.MetadataFormatType; import info.textgrid.middleware.oaipmh.ResumptionTokenType; +import info.textgrid.namespaces.middleware.tgauth.ProjectInfo; /** * <p> @@ -67,6 +81,58 @@ public class OAIPMHUtilities { return tgDublinCore; } + public static List<ProjectInfo> getProjectList() throws AuthClientException { + AuthClient auth = new AuthClient(); + List<ProjectInfo> projectInfos =auth.getAllProjects(); + auth.getAllProjects(); + + return projectInfos; + } + + public static String getProjectName(String projectID) throws AuthClientException { + + AuthClient auth = new AuthClient(); + System.out.println(projectID); + return auth.getProjectInfo(projectID).getName(); + } + + + public static List<String> urisFromSparqlResponse(InputStream in) { + + List<String> urilist = new ArrayList<String>(); + String theString; + try { + theString = IOUtils.toString(in, "UTF-8"); + System.out.println(theString); + } catch (IOException e1) { + // TODO Auto-generated catch block + e1.printStackTrace(); + } + + try { + XMLStreamReader r = XMLInputFactory.newInstance() + .createXMLStreamReader(in); + while (r.hasNext()) { + if ((r.next() == XMLStreamConstants.START_ELEMENT)) { + System.out.println(r.getLocalName()); + } +/* + if ((r.next() == XMLStreamConstants.START_ELEMENT) && (r.getLocalName().equals("uri"))) { + // if (r.getLocalName().equals("uri")) { + String uri = r.getElementText(); + // log.debug("uri: " + uri); + urilist.add(uri); + // } + }*/ + } + } catch (XMLStreamException e) { + log.error("error parsing sparql-result-stream", e); + } + System.out.println("urilistSize: " + urilist.size()); + return urilist; + } + + /** * <p> * Producing the list of all metadata formats. diff --git a/oaipmh-core/src/main/java/info/textgrid/middleware/OaiPmhClient.java b/oaipmh-core/src/main/java/info/textgrid/middleware/OaiPmhClient.java index 299b78fb99c2eeb6909d50c44ef2d315501e9b1f..f5bcfd8a6390756d37654ffe9bd6fd73011da309 100644 --- a/oaipmh-core/src/main/java/info/textgrid/middleware/OaiPmhClient.java +++ b/oaipmh-core/src/main/java/info/textgrid/middleware/OaiPmhClient.java @@ -1,5 +1,6 @@ package info.textgrid.middleware; +import java.io.IOException; import java.text.ParseException; import javax.ws.rs.GET; import javax.ws.rs.Path; @@ -31,8 +32,9 @@ public class OaiPmhClient { * @param verb * @return * @throws ParseException + * @throws IOException */ - public String request(String verb) throws ParseException { + public String request(String verb) throws ParseException, IOException { return this.producer.getRequest(verb, this.identifier, this.metadataPrefix, this.set, this.from, this.until, this.resumptionToken); } @@ -48,4 +50,4 @@ public class OaiPmhClient { + OaipmhServiceVersion.BUILDDATE; } -} +} \ No newline at end of file diff --git a/oaipmh-core/src/main/java/info/textgrid/middleware/SetDeliverer.java b/oaipmh-core/src/main/java/info/textgrid/middleware/SetDeliverer.java index 57d67a256d0ea05a1176c50fabae5eb8fc6ef2d4..78c0a67019565c4400927c683da4b519c406e779 100644 --- a/oaipmh-core/src/main/java/info/textgrid/middleware/SetDeliverer.java +++ b/oaipmh-core/src/main/java/info/textgrid/middleware/SetDeliverer.java @@ -3,32 +3,40 @@ package info.textgrid.middleware; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; -import java.util.Hashtable; -import java.util.Iterator; +import java.util.ArrayList; import java.util.LinkedHashSet; -import java.util.Map; +import java.util.List; +import java.util.Map.Entry; import java.util.Set; + import org.elasticsearch.action.get.GetRequest; import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.common.Strings; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.script.Script; import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.aggregations.Aggregation; import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregationBuilders; +import org.elasticsearch.search.aggregations.bucket.terms.Terms; +import org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; + +import info.textgrid.clients.tgauth.AuthClientException; import info.textgrid.middleware.oaipmh.ListSetsType; import info.textgrid.middleware.oaipmh.SetType; -//import info.textgrid.namespaces.middleware.tgcrud.common.TextGridMimetypes; + /** * */ public class SetDeliverer { - private static Map<String, String> setSet = new Hashtable<String, String>(); + //private static Map<String, String> setSet = new Hashtable<String, String>(); private Set<String> identifier = new LinkedHashSet<String>(); private String formatField; @@ -52,52 +60,83 @@ public class SetDeliverer { /** * @return + * @throws AuthClientException + * @throws IOException */ public ListSetsType setListBuilder() { ListSetsType setList = new ListSetsType(); //TODO looking for DARIAH if DARIAH works on ES6 - //QueryBuilder aggQuery; + SearchRequest request = new SearchRequest(OAI_ESClient.getEsIndex()); SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); AggregationBuilder agg = null; - if (this.textgrid) { - agg = AggregationBuilders - .terms("project.value.untouched") - .field("project.value.untouched") - .size(10000); - } + AggregationBuilder filterPublicProjects = null; + AggregationBuilder projectNameAndID = null; + + + if (this.textgrid) { + Script mergeProjectIDandProjectName = new Script("doc['project.id'].value + '&' + doc['project.value.untouched'].value"); + + filterPublicProjects = AggregationBuilders.filter("projectsPublic", QueryBuilders.boolQuery() + .mustNot(QueryBuilders.existsQuery("nearlyPublished"))); + + projectNameAndID = AggregationBuilders.terms("projects") + .script(mergeProjectIDandProjectName) + .size(1000); + } + if (this.dariah){ agg = AggregationBuilders .terms("descriptiveMetadata.dc:format") .field("descriptiveMetadata.dc:format") .size(10000); - // aggQuery = QueryBuilders.matchPhraseQuery("descriptiveMetadata.dc:format", - // TextGridMimetypes.DARIAH_COLLECTION); - } + + List<String> fieldsForSetRequest = new ArrayList<String>(); + fieldsForSetRequest.add("project.value"); + fieldsForSetRequest.add("project.id"); + + searchSourceBuilder.size(0); + searchSourceBuilder.aggregation(filterPublicProjects); + searchSourceBuilder.aggregation(projectNameAndID); - searchSourceBuilder.aggregation(agg); - searchSourceBuilder.size(10000); request.source(searchSourceBuilder); + SearchResponse getRecordListItems = null; try { - getRecordListItems = OAI_ESClient.getEsClient().search(request, RequestOptions.DEFAULT); + getRecordListItems = OAI_ESClient.getEsClient().search(request, RequestOptions.DEFAULT); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); - } - - // SearchRequestBuilder request = OAI_ESClient.getOaiESClient() - // .prepareSearch(OAI_ESClient.getEsIndex()).setTypes(OAI_ESClient.getEsType()) - // .setQuery(aggQuery).addField(this.formatField).addField(this.identifierField) - // .addField("project.value").addField("project.id").setSize(100000); - // SearchResponse getRecordListItems = request.execute().actionGet(); + } + if (this.textgrid == true && getRecordListItems.getAggregations()!=null) { + + String projectName=""; + + for (Entry<String, Aggregation> entry : getRecordListItems.getAggregations().asMap().entrySet()) { + String name = entry.getKey(); + + if (name.equals("projects")) { + Terms a = (Terms) entry.getValue(); + for (Bucket bentry : a.getBuckets()) { + projectName = bentry.getKey().toString(); + String [] projectInfos = projectName.split("&"); + + SetType setForsTextGrid = new SetType(); + setForsTextGrid.setSetSpec(projectInfos[0]); + setForsTextGrid.setSetName(projectInfos[1]); + setList.getSet().add(setForsTextGrid); + } + } + + } + for (SearchHit hit : getRecordListItems.getHits().getHits()) { - + if (this.dariah == true && hit.getSourceAsMap().get(this.identifierField) .toString().startsWith("hdl:")) { @@ -105,32 +144,10 @@ public class SetDeliverer { this.identifier.add(pid); } - - if (this.textgrid == true) { - String[] projectFields = new String[] {"project.value", "project.id"}; - String projectName = DublinCoreFieldLoader.fillList(hit, projectFields).get(0); - String projectID = DublinCoreFieldLoader.fillList(hit, projectFields).get(1); - - // FIXME Why is that set spec thing commented out?? - // String projectSetSpec = projectName.concat(":").concat(projectID); - // FIXME setSet remained from develop branch. - // SetDeliverer.setSet.put(projectID, projectName); - } - } - - if (this.textgrid == true) { - - Iterator<?> it = setSet.entrySet().iterator(); - while (it.hasNext()) { - @SuppressWarnings("rawtypes") - Map.Entry pair = (Map.Entry) it.next(); - SetType set = new SetType(); - set.setSetName(pair.getValue().toString()); - set.setSetSpec("project:" + pair.getKey().toString()); - setList.getSet().add(set); + } } - + if (this.dariah == true) { for (String identifierSetSpec : this.identifier) { String id = identifierSetSpec; @@ -290,4 +307,4 @@ public class SetDeliverer { this.dariah = dariah; } -} +} \ No newline at end of file diff --git a/oaipmh-core/src/test/java/info/textgrid/middleware/OaiPmhTest.java b/oaipmh-core/src/test/java/info/textgrid/middleware/OaiPmhTest.java index ccae2d37c7276f56cf385e7e5940a3e7aa219a3f..4e9448547fc2ba2ad3706b10999661764e131aff 100644 --- a/oaipmh-core/src/test/java/info/textgrid/middleware/OaiPmhTest.java +++ b/oaipmh-core/src/test/java/info/textgrid/middleware/OaiPmhTest.java @@ -487,4 +487,4 @@ public class OaiPmhTest { System.out.println("-----------------------------------\n"); } -} +} \ No newline at end of file