From af8c3db2db19bb828c8014ee043d9e75d47afa72 Mon Sep 17 00:00:00 2001
From: "Stefan E. Funk" <funk@sub.uni-goettingen.de>
Date: Wed, 21 Jul 2021 18:25:40 +0200
Subject: [PATCH] IDs gotten from ES in DH, solving fields issue now!

---
 .../RecordListDelivererAbstract.java          | 111 +++++++-----------
 .../RecordListDelivererDATACITE.java          |  11 +-
 2 files changed, 47 insertions(+), 75 deletions(-)

diff --git a/oaipmh-core/src/main/java/info/textgrid/middleware/RecordListDelivererAbstract.java b/oaipmh-core/src/main/java/info/textgrid/middleware/RecordListDelivererAbstract.java
index e35dcf6d..4219d23c 100644
--- a/oaipmh-core/src/main/java/info/textgrid/middleware/RecordListDelivererAbstract.java
+++ b/oaipmh-core/src/main/java/info/textgrid/middleware/RecordListDelivererAbstract.java
@@ -76,67 +76,11 @@ public abstract class RecordListDelivererAbstract implements RecordListDeliverer
 
     QueryBuilder query;
 
-    // FIXME Use values from config!
-
-    // FIXME Unite with method getUriListDARIAH()!
-
-    // FIXME Avoid useless logging!
-
-    System.out.println("RANGEFIELD: " + "created");
-
-    QueryBuilder rangeQuery = QueryBuilders.rangeQuery("created").from(from).to(to);
-    QueryBuilder filterSandBox = QueryBuilders.matchPhraseQuery("nearlyPublished", "true");
-
-    if (set != null && !set.equals("openaire_data")) {
-      String[] setParts = set.split(":");
-
-      String queryField = "";
-      String valueField = "";
-
-      if (setParts[0].equals(TGConstants.SET_FIELD_FOR_TEXTGRID)) {
-        queryField = TGConstants.PROJECT_ID_FOR_TEXTGRID;
-        valueField = setParts[1];
-      }
-
-      // FIXME Unite with query from class SetListDeliverer!
-
-      QueryBuilder matchQuery = QueryBuilders.matchPhraseQuery(queryField, valueField);
-      QueryBuilder boolQuery =
-          QueryBuilders.boolQuery().must(rangeQuery).must(matchQuery).mustNot(filterSandBox);
-
-      query = boolQuery;
-    } else {
-      // query = rangeQuery;
-      query = QueryBuilders.boolQuery().must(rangeQuery).mustNot(filterSandBox);
-    }
-
-    System.out.println("  ##  QUERY:\n" + query);
-
-    result = getFieldsFromESIndex(query, resumptionToken, set);
-
-    System.out.println("  ##  RESULT: " + result);
-
-    return result;
-  }
-
-  /**
-   * @param from
-   * @param to
-   * @param set
-   * @param resumptionToken
-   * @return
-   */
-  public List<String> getUriListDARIAH(String from, String to, String set, String resumptionToken) {
-
-    List<String> result = new ArrayList<String>();
-
-    QueryBuilder query;
-
     System.out.println("  ##  RANGEFIELD: " + this.dateOfObjectCreation);
 
     QueryBuilder rangeQuery = QueryBuilders.rangeQuery(this.dateOfObjectCreation).from(from).to(to);
+    QueryBuilder filterSandBox = QueryBuilders.matchPhraseQuery("nearlyPublished", "true");
 
-    // TODO We must set the set's range as in SetListDeliverer using query scripts!
     if (set != null && !set.equals("openaire_data")) {
       String[] setParts = set.split(":");
       String queryField = setParts[0];
@@ -145,12 +89,27 @@ public abstract class RecordListDelivererAbstract implements RecordListDeliverer
       System.out.println("  ##  queryField: " + queryField);
       System.out.println("  ##  valueField: " + valueField);
 
-      QueryBuilder matchQuery = QueryBuilders.matchPhraseQuery(queryField, valueField);
-      QueryBuilder boolQuery = QueryBuilders.boolQuery().must(rangeQuery).must(matchQuery);
+      // I do not understand this, can possibly be deleted?
+      // String queryField = "";
+      // String valueField = "";
+      //
+      // if (setParts[0].equals(TGConstants.SET_FIELD_FOR_TEXTGRID)) {
+      // queryField = TGConstants.PROJECT_ID_FOR_TEXTGRID;
+      // valueField = setParts[1];
+      // }
 
-      query = boolQuery;
+      QueryBuilder matchQuery = QueryBuilders.matchPhraseQuery(queryField, valueField);
+      if (this.textgrid) {
+        query = QueryBuilders.boolQuery().must(rangeQuery).must(matchQuery).mustNot(filterSandBox);
+      } else {
+        query = QueryBuilders.boolQuery().must(rangeQuery).must(matchQuery);
+      }
     } else {
-      query = QueryBuilders.boolQuery().must(rangeQuery);
+      if (this.textgrid) {
+        query = QueryBuilders.boolQuery().must(rangeQuery).mustNot(filterSandBox);
+      } else {
+        query = QueryBuilders.boolQuery().must(rangeQuery);
+      }
     }
 
     System.out.println("  ##  QUERY:\n" + query);
@@ -172,8 +131,16 @@ public abstract class RecordListDelivererAbstract implements RecordListDeliverer
       String set) {
 
     List<String> uriList = new ArrayList<String>();
-    QueryBuilder recordFilter = QueryBuilders.boolQuery().must(query)
-        .must(QueryBuilders.matchPhraseQuery("format", this.formatToFilter));
+
+    QueryBuilder recordFilter;
+    if (this.textgrid) {
+      // We filter out all editions here!
+      recordFilter = QueryBuilders.boolQuery().must(query)
+          .must(QueryBuilders.matchPhraseQuery("format", this.formatToFilter));
+    } else {
+      // Do not filter at all in DH. We need every ID!
+      recordFilter = QueryBuilders.boolQuery().must(query);
+    }
 
     SearchRequest searchRequest = new SearchRequest(OAI_ESClient.getEsIndex());
     SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
@@ -218,7 +185,17 @@ public abstract class RecordListDelivererAbstract implements RecordListDeliverer
       for (SearchHit hit : scrollResp.getHits().getHits()) {
         i++;
         if (hit != null && hit.getFields() != null) {
-          uriList.add(hit.getSourceAsMap().get(TGConstants.URI).toString());
+          String id2add;
+          // FIXME Could we not use hit.getId() also for TG hits? Where is the difference?
+          if (this.textgrid) {
+            id2add = hit.getSourceAsMap().get(TGConstants.URI).toString();
+          } else {
+            id2add = hit.getId();
+          }
+
+          System.out.println("  ##  id2add: " + id2add);
+
+          uriList.add(id2add);
         }
       }
       if (resumptionToken != null
@@ -362,9 +339,6 @@ public abstract class RecordListDelivererAbstract implements RecordListDeliverer
    * @param searchResponseSize
    */
   public void setSearchResponseSize(int searchResponseSize) {
-
-    log.debug("SearchResponseSize: " + searchResponseSize);
-
     this.searchResponseSize = searchResponseSize;
   }
 
@@ -564,9 +538,6 @@ public abstract class RecordListDelivererAbstract implements RecordListDeliverer
    * @param fields
    */
   public void setFields(String[] fields) {
-
-    System.out.println("  ##  fields set: " + fields);
-
     this.fields = fields;
   }
 
diff --git a/oaipmh-core/src/main/java/info/textgrid/middleware/RecordListDelivererDATACITE.java b/oaipmh-core/src/main/java/info/textgrid/middleware/RecordListDelivererDATACITE.java
index 5281d742..d533adf7 100644
--- a/oaipmh-core/src/main/java/info/textgrid/middleware/RecordListDelivererDATACITE.java
+++ b/oaipmh-core/src/main/java/info/textgrid/middleware/RecordListDelivererDATACITE.java
@@ -48,7 +48,7 @@ public class RecordListDelivererDATACITE extends RecordListDelivererAbstract {
 
           System.out.println("  ##  URI: " + uri);
 
-          // We must remove the prefix, as ElasticSearch is storing the IDa without it.
+          // We must remove the prefix, as ElasticSearch is storing the IDs without it.
           GetRecordType grt = openAireRecord.getRecordById(uri.replace("textgrid:", ""));
           openAireRecordList.getRecord().add(grt.getRecord());
         }
@@ -59,15 +59,16 @@ public class RecordListDelivererDATACITE extends RecordListDelivererAbstract {
       // **
 
       else if (this.dariah) {
-        for (String uri : getUriListDARIAH(from, to, set, resumptionToken)) {
+        for (String uri : getUriList(from, to, set, resumptionToken)) {
 
           log.debug("uri: " + uri);
 
           System.out.println("  ##  URI: " + uri);
 
-          // We must remove the prefix, as ElasticSearch is storing the IDa without it.
-          GetRecordType grt =
-              openAireRecord.getRecordById(uri.replace(RDFConstants.HDL_PREFIX, ""));
+          // We must remove the prefix, as ElasticSearch is storing the IDs without it.
+          // GetRecordType grt =
+          // openAireRecord.getRecordById(uri.replace(RDFConstants.HDL_PREFIX, ""));
+          GetRecordType grt = openAireRecord.getRecordById(uri);
           openAireRecordList.getRecord().add(grt.getRecord());
         }
       }
-- 
GitLab