From f8029b55a1c261d371ce3c31fc01862e53f253fe Mon Sep 17 00:00:00 2001 From: mbrodhu <brodhun@sub.uni-goettingen.de> Date: Fri, 29 Jan 2016 10:53:36 +0100 Subject: [PATCH] Test Resumption Token in ListIdentifiers --- oaipmh-core/pom.xml | 4 +- .../middleware/IdentifierListDeliverer.java | 219 +++++++++--------- .../info/textgrid/middleware/OaiPmhTest.java | 2 +- oaipmh-webapp/pom.xml | 4 +- pom.xml | 2 +- 5 files changed, 111 insertions(+), 120 deletions(-) diff --git a/oaipmh-core/pom.xml b/oaipmh-core/pom.xml index 0454decc..61ab43db 100644 --- a/oaipmh-core/pom.xml +++ b/oaipmh-core/pom.xml @@ -4,11 +4,11 @@ <parent> <artifactId>oaipmh</artifactId> <groupId>info.textgrid.middleware</groupId> - <version>1.3.24-SNAPSHOT</version> + <version>1.3.25-SNAPSHOT</version> </parent> <groupId>info.textgrid.middleware</groupId> <artifactId>oaipmh-core</artifactId> - <version>1.3.24-SNAPSHOT</version> + <version>1.3.25-SNAPSHOT</version> <packaging>jar</packaging> <name>TextGrid :: TG-OAI-PMH :: Core</name> <url>http://maven.apache.org</url> diff --git a/oaipmh-core/src/main/java/info/textgrid/middleware/IdentifierListDeliverer.java b/oaipmh-core/src/main/java/info/textgrid/middleware/IdentifierListDeliverer.java index ab5daa32..cc6fec3b 100644 --- a/oaipmh-core/src/main/java/info/textgrid/middleware/IdentifierListDeliverer.java +++ b/oaipmh-core/src/main/java/info/textgrid/middleware/IdentifierListDeliverer.java @@ -5,7 +5,9 @@ package info.textgrid.middleware; import info.textgrid.middleware.oaipmh.HeaderType; import info.textgrid.middleware.oaipmh.ListIdentifiersType; import info.textgrid.middleware.oaipmh.RequestType; +import info.textgrid.middleware.oaipmh.ResumptionTokenType; +import java.math.BigInteger; import java.text.ParseException; import javax.xml.datatype.DatatypeConfigurationException; @@ -13,6 +15,8 @@ import javax.xml.datatype.DatatypeConfigurationException; import org.apache.commons.logging.LogFactory; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.action.search.SearchType; +import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHit; @@ -45,6 +49,10 @@ public class IdentifierListDeliverer { private String dateOfObjectCreation; // Field for the object creation in the repository private String repositoryObjectURIPrefix; // private String identifierField; + long start = 0; + int searchResponseSize = 5; + String resumptionToken; + ResumptionTokenType value = new ResumptionTokenType(); //private String set; @@ -88,7 +96,6 @@ public class IdentifierListDeliverer { return lit; } - /** * To get the required values for the ListIdentifiers request this function * will ask ElasticSearch for a specific textgridUri the values "created" @@ -114,19 +121,91 @@ public class IdentifierListDeliverer { throws ParseException { ListIdentifiersType lit = new ListIdentifiersType(); - String identifier; - QueryBuilder query; - QueryBuilder rangeQuery = QueryBuilders.rangeQuery(rangeField) - .from(from).to(to); + String identifier = ""; + QueryBuilder query = setOrNot(set, from, to); + + SearchRequestBuilder getListIdentifiersValuesBuilder = oaiEsClient + .getOaiESClient().prepareSearch(oaiEsClient.getEsIndex()) + .setSearchType(SearchType.SCAN) + .setScroll(new TimeValue(60000)) + .setTypes(oaiEsClient.getEsType()) + .addFields(identifierListFields) + .setQuery(query) + .setSize(searchResponseSize); + SearchResponse listListIdentiferValues = getListIdentifiersValuesBuilder.execute().actionGet(); - //System.out.println("dariah has value: " + dariah); - //System.out.println("set has value: " + set); + BigInteger bi2 = BigInteger.valueOf(listListIdentiferValues.getHits().getTotalHits()); //Number of Total Hits as BigInteger (required by oaipmh ResumptionToken) + BigInteger bi1 = BigInteger.valueOf(start); //start Value of the list. Important to increase the the cursor value of the resumption token - if(set!=null){ - //ithis.set=set; - //System.out.println("inside set with value: " + set); + //if (listListIdentiferValues.getHits().getTotalHits() > 0) { + while (bi1.floatValue() < bi2.floatValue()) { + listListIdentiferValues =hitHandling(listListIdentiferValues, listListIdentiferValues.getScrollId(), identifier, lit, set); + + listListIdentiferValues = oaiEsClient.getOaiESClient() + .prepareSearchScroll(listListIdentiferValues.getScrollId()) + .setScroll(new TimeValue(60000)) + .execute() + .actionGet(); + + start=start+searchResponseSize; + + + lit.setResumptionToken(setResToken(bi2, bi1, listListIdentiferValues.getScrollId())); + + if (listListIdentiferValues.getHits().getHits().length == 0) { + break; + } + + } + + return lit; + } + + public SearchResponse hitHandling(SearchResponse listFurtherValues, String scrollID, String identifier, ListIdentifiersType lit, String set){ + for (SearchHit hit : listFurtherValues.getHits().getHits()) { + if (this.textgrid && hit.getFields().get(formatField).getValue().toString().equals(formatToFilter)) { + + try { + String datestamp = hit.getFields().get(this.dateOfObjectCreation).getValue().toString(); + datestamp = OAIPMHUtillities.convertDateFormat(datestamp).toXMLFormat(); + + } catch (ParseException e1) { + log.debug(e1); + } catch (DatatypeConfigurationException e1) { + log.debug(e1); + } + identifier = hit.getFields().get(identifierField).getValue().toString(); + lit = setListIdentifierHeader(datestamp, identifier, lit, set); + } + + if (this.dariah) { + datestamp = hit.getFields().get(dateOfObjectCreation).getValue().toString(); + + try { + String datestamp = hit.getFields().get(this.dateOfObjectCreation).getValue().toString(); + datestamp = OAIPMHUtillities.convertDateFormat(datestamp).toXMLFormat(); + } catch (ParseException e1) { + log.debug(e1); + } catch (DatatypeConfigurationException e1) { + log.debug(e1); + } + + identifier = hit.getFields().get(identifierField).getValue().toString(); + lit = setListIdentifierHeader(datestamp, identifier, lit, set); + } + } + return listFurtherValues; + } + + public QueryBuilder setOrNot(String set, String from, String to){ + + QueryBuilder rangeQuery = QueryBuilders.rangeQuery(rangeField).from(from).to(to); + QueryBuilder formatQuery = QueryBuilders.matchPhrasePrefixQuery("format", "text/tg.edition+tg.aggregation+xml"); + QueryBuilder query; + + if(set!=null){ String queryField = ""; String valueField = ""; @@ -135,13 +214,9 @@ public class IdentifierListDeliverer { * COLLECTIONREGISTRY_NAMESPACE is the name of the set Category * but no field in ElasticSearch. It has to be queried in field * "metadata.dc:relation". - */ - + */ queryField = "metadata.dc:relation"; - valueField = set; - //System.out.println("check"); - //System.out.println("valueField: " + valueField); - + valueField = set; } if (this.textgrid == true) { @@ -153,110 +228,26 @@ public class IdentifierListDeliverer { } QueryBuilder matchQuery = QueryBuilders.matchPhraseQuery(queryField, valueField); - QueryBuilder boolQuery = QueryBuilders.boolQuery().must(rangeQuery).must(matchQuery); - query = boolQuery; - }else { - query = rangeQuery; + query = QueryBuilders.boolQuery().must(matchQuery).must(formatQuery); + + } else { + query = QueryBuilders.boolQuery().must(rangeQuery).must(formatQuery); } - SearchRequestBuilder getListIdentifiersValuesBuilder = oaiEsClient - .getOaiESClient().prepareSearch(oaiEsClient.getEsIndex()) - .setTypes(oaiEsClient.getEsType()) - .addFields(identifierListFields).setQuery(query) - .setSize(10000); - - SearchResponse listListIdentiferValues = getListIdentifiersValuesBuilder - .execute().actionGet(); + return query; + } + + public ResumptionTokenType setResToken(BigInteger bi2, BigInteger bi1, String scrollID){ + value.setCompleteListSize(bi2); + value.setCursor(bi1); + resumptionToken = scrollID; + value.setValue(resumptionToken); - if (listListIdentiferValues.getHits().getTotalHits() > 0) { - //System.out.println(listListIdentiferValues.getHits().getTotalHits() + ">" + 0); - for (SearchHit hit : listListIdentiferValues.getHits().getHits()) { - - if (this.textgrid && hit.getFields().get(formatField).getValue() - .toString().equals(formatToFilter)) { - - //System.out.println("YEAH"); - - /*System.out.println("textgrid has value: " + textgrid); - System.out.println("formatField has value: " + formatField); - System.out.println("formatToFilter has value: " + formatToFilter);*/ - - - try { - - String datestamp = hit.getFields().get(this.dateOfObjectCreation).getValue().toString(); - - //System.out.println("datestamp: " + datestamp); - - datestamp = OAIPMHUtillities.convertDateFormat(datestamp).toXMLFormat(); - - //System.out.println("datestamp: " + datestamp); - //System.out.println("got the date in xml format"); - - } catch (ParseException e1) { - log.debug(e1); - } catch (DatatypeConfigurationException e1) { - log.debug(e1); - } - - //System.out.println("identifierFIeld has value: " + identifierField); - - identifier = hit.getFields().get(identifierField).getValue().toString(); - - lit = setListIdentifierHeader(datestamp, identifier, lit, set); - } - - // FIXME @MAX: REMOVE CODE DUPLICATION!!!!!!! - - if (this.dariah) { - System.out.println("start with dariah process"); - System.out.println("dariah ist set to: " + dariah); - datestamp = hit.getFields().get(dateOfObjectCreation) - .getValue().toString(); - - /*try { - // XMLGregorianCalendar convertedDateFormat= - // OAIPMHUtillities.convertDateFormat(datestamp); - System.out.println("check datestamp"); - datestamp = OAIPMHUtillities.convertDateFormat( - datestamp).toXMLFormat(); - System.out.println("check datestamp"); - } catch (DatatypeConfigurationException e) { - log.debug(e); - }*/ - try { - - String datestamp = hit.getFields() - .get(this.dateOfObjectCreation).getValue() - .toString(); - - datestamp = OAIPMHUtillities.convertDateFormat( - datestamp).toXMLFormat(); - } catch (ParseException e1) { - log.debug(e1); - } catch (DatatypeConfigurationException e1) { - log.debug(e1); - } - - // FIXME identifierPrefix to identifierField - - identifier = hit.getFields().get(identifierField) - .getValue().toString(); - System.out.println(identifier); - lit = setListIdentifierHeader(datestamp, identifier, lit, set); - } - } - - } else { - - lit = null; - } - //System.out.println("here we are"); - return lit; + return value; } - + public ListIdentifiersType getLit(ListIdentifiersType lit) { return lit; } diff --git a/oaipmh-core/src/test/java/info/textgrid/middleware/OaiPmhTest.java b/oaipmh-core/src/test/java/info/textgrid/middleware/OaiPmhTest.java index 85c7cc73..e884b5ea 100644 --- a/oaipmh-core/src/test/java/info/textgrid/middleware/OaiPmhTest.java +++ b/oaipmh-core/src/test/java/info/textgrid/middleware/OaiPmhTest.java @@ -270,7 +270,7 @@ public class OaiPmhTest { * @throws ParseException */ @Test - @Ignore + //@Ignore public void testListIdentifierSets() throws ParseException { this.identifierList.setFieldForRange(TGConstants.RANGE_FIELD); this.identifierList.setIdentifierListFields(TGConstants.IDENTIFIER_LIST_FIELDS); diff --git a/oaipmh-webapp/pom.xml b/oaipmh-webapp/pom.xml index 03ebaba9..e2ce7f15 100644 --- a/oaipmh-webapp/pom.xml +++ b/oaipmh-webapp/pom.xml @@ -4,12 +4,12 @@ <parent> <artifactId>oaipmh</artifactId> <groupId>info.textgrid.middleware</groupId> - <version>1.3.24-SNAPSHOT</version> + <version>1.3.25-SNAPSHOT</version> </parent> <groupId>info.textgrid.middleware</groupId> <artifactId>oaipmh-webapp</artifactId> <packaging>war</packaging> - <version>1.3.24-SNAPSHOT</version> + <version>1.3.25-SNAPSHOT</version> <name>TextGrid :: OAI-PMH :: Webapp</name> <url>http://maven.apache.org</url> <profiles> diff --git a/pom.xml b/pom.xml index ead79806..80a633f4 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ <modelVersion>4.0.0</modelVersion> <groupId>info.textgrid.middleware</groupId> <artifactId>oaipmh</artifactId> - <version>1.3.24-SNAPSHOT</version> + <version>1.3.25-SNAPSHOT</version> <packaging>pom</packaging> <name>TextGrid :: TG-OAI-PMH :: Parent</name> <properties> -- GitLab