From af8c3db2db19bb828c8014ee043d9e75d47afa72 Mon Sep 17 00:00:00 2001 From: "Stefan E. Funk" <funk@sub.uni-goettingen.de> Date: Wed, 21 Jul 2021 18:25:40 +0200 Subject: [PATCH] IDs gotten from ES in DH, solving fields issue now! --- .../RecordListDelivererAbstract.java | 111 +++++++----------- .../RecordListDelivererDATACITE.java | 11 +- 2 files changed, 47 insertions(+), 75 deletions(-) diff --git a/oaipmh-core/src/main/java/info/textgrid/middleware/RecordListDelivererAbstract.java b/oaipmh-core/src/main/java/info/textgrid/middleware/RecordListDelivererAbstract.java index e35dcf6d..4219d23c 100644 --- a/oaipmh-core/src/main/java/info/textgrid/middleware/RecordListDelivererAbstract.java +++ b/oaipmh-core/src/main/java/info/textgrid/middleware/RecordListDelivererAbstract.java @@ -76,67 +76,11 @@ public abstract class RecordListDelivererAbstract implements RecordListDeliverer QueryBuilder query; - // FIXME Use values from config! - - // FIXME Unite with method getUriListDARIAH()! - - // FIXME Avoid useless logging! - - System.out.println("RANGEFIELD: " + "created"); - - QueryBuilder rangeQuery = QueryBuilders.rangeQuery("created").from(from).to(to); - QueryBuilder filterSandBox = QueryBuilders.matchPhraseQuery("nearlyPublished", "true"); - - if (set != null && !set.equals("openaire_data")) { - String[] setParts = set.split(":"); - - String queryField = ""; - String valueField = ""; - - if (setParts[0].equals(TGConstants.SET_FIELD_FOR_TEXTGRID)) { - queryField = TGConstants.PROJECT_ID_FOR_TEXTGRID; - valueField = setParts[1]; - } - - // FIXME Unite with query from class SetListDeliverer! - - QueryBuilder matchQuery = QueryBuilders.matchPhraseQuery(queryField, valueField); - QueryBuilder boolQuery = - QueryBuilders.boolQuery().must(rangeQuery).must(matchQuery).mustNot(filterSandBox); - - query = boolQuery; - } else { - // query = rangeQuery; - query = QueryBuilders.boolQuery().must(rangeQuery).mustNot(filterSandBox); - } - - System.out.println(" ## QUERY:\n" + query); - - result = getFieldsFromESIndex(query, resumptionToken, set); - - System.out.println(" ## RESULT: " + result); - - return result; - } - - /** - * @param from - * @param to - * @param set - * @param resumptionToken - * @return - */ - public List<String> getUriListDARIAH(String from, String to, String set, String resumptionToken) { - - List<String> result = new ArrayList<String>(); - - QueryBuilder query; - System.out.println(" ## RANGEFIELD: " + this.dateOfObjectCreation); QueryBuilder rangeQuery = QueryBuilders.rangeQuery(this.dateOfObjectCreation).from(from).to(to); + QueryBuilder filterSandBox = QueryBuilders.matchPhraseQuery("nearlyPublished", "true"); - // TODO We must set the set's range as in SetListDeliverer using query scripts! if (set != null && !set.equals("openaire_data")) { String[] setParts = set.split(":"); String queryField = setParts[0]; @@ -145,12 +89,27 @@ public abstract class RecordListDelivererAbstract implements RecordListDeliverer System.out.println(" ## queryField: " + queryField); System.out.println(" ## valueField: " + valueField); - QueryBuilder matchQuery = QueryBuilders.matchPhraseQuery(queryField, valueField); - QueryBuilder boolQuery = QueryBuilders.boolQuery().must(rangeQuery).must(matchQuery); + // I do not understand this, can possibly be deleted? + // String queryField = ""; + // String valueField = ""; + // + // if (setParts[0].equals(TGConstants.SET_FIELD_FOR_TEXTGRID)) { + // queryField = TGConstants.PROJECT_ID_FOR_TEXTGRID; + // valueField = setParts[1]; + // } - query = boolQuery; + QueryBuilder matchQuery = QueryBuilders.matchPhraseQuery(queryField, valueField); + if (this.textgrid) { + query = QueryBuilders.boolQuery().must(rangeQuery).must(matchQuery).mustNot(filterSandBox); + } else { + query = QueryBuilders.boolQuery().must(rangeQuery).must(matchQuery); + } } else { - query = QueryBuilders.boolQuery().must(rangeQuery); + if (this.textgrid) { + query = QueryBuilders.boolQuery().must(rangeQuery).mustNot(filterSandBox); + } else { + query = QueryBuilders.boolQuery().must(rangeQuery); + } } System.out.println(" ## QUERY:\n" + query); @@ -172,8 +131,16 @@ public abstract class RecordListDelivererAbstract implements RecordListDeliverer String set) { List<String> uriList = new ArrayList<String>(); - QueryBuilder recordFilter = QueryBuilders.boolQuery().must(query) - .must(QueryBuilders.matchPhraseQuery("format", this.formatToFilter)); + + QueryBuilder recordFilter; + if (this.textgrid) { + // We filter out all editions here! + recordFilter = QueryBuilders.boolQuery().must(query) + .must(QueryBuilders.matchPhraseQuery("format", this.formatToFilter)); + } else { + // Do not filter at all in DH. We need every ID! + recordFilter = QueryBuilders.boolQuery().must(query); + } SearchRequest searchRequest = new SearchRequest(OAI_ESClient.getEsIndex()); SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); @@ -218,7 +185,17 @@ public abstract class RecordListDelivererAbstract implements RecordListDeliverer for (SearchHit hit : scrollResp.getHits().getHits()) { i++; if (hit != null && hit.getFields() != null) { - uriList.add(hit.getSourceAsMap().get(TGConstants.URI).toString()); + String id2add; + // FIXME Could we not use hit.getId() also for TG hits? Where is the difference? + if (this.textgrid) { + id2add = hit.getSourceAsMap().get(TGConstants.URI).toString(); + } else { + id2add = hit.getId(); + } + + System.out.println(" ## id2add: " + id2add); + + uriList.add(id2add); } } if (resumptionToken != null @@ -362,9 +339,6 @@ public abstract class RecordListDelivererAbstract implements RecordListDeliverer * @param searchResponseSize */ public void setSearchResponseSize(int searchResponseSize) { - - log.debug("SearchResponseSize: " + searchResponseSize); - this.searchResponseSize = searchResponseSize; } @@ -564,9 +538,6 @@ public abstract class RecordListDelivererAbstract implements RecordListDeliverer * @param fields */ public void setFields(String[] fields) { - - System.out.println(" ## fields set: " + fields); - this.fields = fields; } diff --git a/oaipmh-core/src/main/java/info/textgrid/middleware/RecordListDelivererDATACITE.java b/oaipmh-core/src/main/java/info/textgrid/middleware/RecordListDelivererDATACITE.java index 5281d742..d533adf7 100644 --- a/oaipmh-core/src/main/java/info/textgrid/middleware/RecordListDelivererDATACITE.java +++ b/oaipmh-core/src/main/java/info/textgrid/middleware/RecordListDelivererDATACITE.java @@ -48,7 +48,7 @@ public class RecordListDelivererDATACITE extends RecordListDelivererAbstract { System.out.println(" ## URI: " + uri); - // We must remove the prefix, as ElasticSearch is storing the IDa without it. + // We must remove the prefix, as ElasticSearch is storing the IDs without it. GetRecordType grt = openAireRecord.getRecordById(uri.replace("textgrid:", "")); openAireRecordList.getRecord().add(grt.getRecord()); } @@ -59,15 +59,16 @@ public class RecordListDelivererDATACITE extends RecordListDelivererAbstract { // ** else if (this.dariah) { - for (String uri : getUriListDARIAH(from, to, set, resumptionToken)) { + for (String uri : getUriList(from, to, set, resumptionToken)) { log.debug("uri: " + uri); System.out.println(" ## URI: " + uri); - // We must remove the prefix, as ElasticSearch is storing the IDa without it. - GetRecordType grt = - openAireRecord.getRecordById(uri.replace(RDFConstants.HDL_PREFIX, "")); + // We must remove the prefix, as ElasticSearch is storing the IDs without it. + // GetRecordType grt = + // openAireRecord.getRecordById(uri.replace(RDFConstants.HDL_PREFIX, "")); + GetRecordType grt = openAireRecord.getRecordById(uri); openAireRecordList.getRecord().add(grt.getRecord()); } } -- GitLab