diff --git a/README.md b/README.md index c602db55fd8a9c7ce3a814a5bb0aa2d48db46ae0..d111fbc4c8b5cbef6eda296c5dda90c80a8d1408 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,12 @@ Additionally it provides function to save data into the TextGrid-Repository Publizierfähige (Bild)-Daten und Metadaten werden von ConedaKor in das TextGrid Repository (OwnStorage) eingespielt, um dann per TextGrid OAI-PMH (MetadataFormat **oai_idiom_mets** aus Projekt „idiom“ <https://dev.textgridlab.org/1.0/tgoaipmh/oai?verb=ListRecords&metadataPrefix=oai_idiom_mets>) in die Visual Library der Uni Bonn übernehmen zu können. +(vorläufige) Beispiel-Links zur VL: + +* <https://maya.test.vls.io> +* <https://maya.test.vls.io/content/titleinfo/55267> + + #### Workflow * Bilder mit Metadaten werden in einer ConedaKOR-Instanz gehostet und publiziert <https://classicmayan.kor.de.dariah.eu>. @@ -43,7 +49,9 @@ New ready-to-go images from ConedaKor shall be published in the TextGrid Reposit * **update** updates all the files modified since the above date (gets 500 objects at a time, please repeat if you want to update more), * **importNew** only imports all new files since the aboce date (imports 500 new objects at a time, please repeat if you want to import more), and * **importAll** imports all the applicable data from ConedaCOR in a new collection with name “ConedaKorMediumMetadata“ (not really needed at the moment), including all the subcollections to be created (importa all the items, no need for repeating anything). + * **checkAll** looks up every ConedaKOR ID in TextGrid titles and/or notes tags, and outputs a list of KOR IDs missing in TextGrid. * Try starting the main class using **dryRun=true** to see, what would happen, start main class using **dryRun=false** to make it happen! + * Test for newly imported objects in OAI-PMH (please set **from** and **to** values correctly): <https://textgridlab.org/1.0/tgoaipmh/oai?verb=ListRecords&metadataPrefix=oai_idiom_mets&from=2022-12-01&until=2022-12-06> ### Useful queries @@ -66,7 +74,7 @@ New ready-to-go images from ConedaKor shall be published in the TextGrid Reposit * <https://classicmayan.kor.de.dariah.eu/entities.json?kind_id=1&updated_after=2022-05-01&include=technical,dataset> * <https://classicmayan.kor.de.dariah.eu/entities.json?kind_id=1&created_after=&created_before=&per_page=&page=&include=technical,dataset> * <https://classicmayan.kor.de.dariah.eu/entities.json?kind_id=1&created_before=2022-03-18&updated_after=2022-03-18&include=technical,dataset> - + * <https://classicmayan.kor.de.dariah.eu/entities.json?kind_id=1&id=27662&include=technical,dataset> ## Releasing a new version diff --git a/src/main/java/org/classicmayan/tools/ConedaKor2TextGridRep.java b/src/main/java/org/classicmayan/tools/ConedaKor2TextGridRep.java index 472b5d1e28dcae7ecdfb955a1f5f76b59d780752..a55982d044fc47b05400810fd6ab3b171c7236ee 100644 --- a/src/main/java/org/classicmayan/tools/ConedaKor2TextGridRep.java +++ b/src/main/java/org/classicmayan/tools/ConedaKor2TextGridRep.java @@ -1,5 +1,5 @@ /** - * This software is copyright (c) 2022 by + * This software is copyright (c) 2023 by * * Göttingen State and University Library * @@ -18,6 +18,7 @@ package org.classicmayan.tools; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -101,6 +102,14 @@ public class ConedaKor2TextGridRep { main.importAll(); } + /* + * CHECK ALL KOR IDS IN TEXTGRIDLAB + */ + + else if (main.itgo.getScope().equals(IdiomConstants.SCOPE_CHECK_ALL)) { + main.checkAll(); + } + /* * UPDATE OF MODIFIED CONEDAKOR OBJECTS EXISTING IN TEXTGRIDREP */ @@ -265,6 +274,69 @@ public class ConedaKor2TextGridRep { + MEDIUM_METADATA_COLLECTION_NAME + "' and " + subcols.size() + " objects"); } + /** + * <p> + * CHECKS ALL image object metadata from ConedaKOR if already existing in TextGridLab. + * </p> + * + * @throws IOException + * @throws JSONException + * @throws InterruptedException + * @throws CrudClientException + */ + public void checkAll() + throws JSONException, IOException, InterruptedException, CrudClientException { + + System.out.println(); + System.out.println("CHECK all ConedaKOR image objects in TG-rep"); + System.out.println(LINES); + + // Get list of all objects in ConedaKOR. + System.out.println("\tLooking for ALL images"); + System.out.println("\tConedaKOR URL: " + ConedaKorQueries.KOR_URL); + + // Check amount. + JSONObject testMediaList = ConedaKorQueries.getAllImageMetadata(10, 1); + int amount = testMediaList.getInt("total"); + int pages = amount / OBJECTS_PER_PAGE; + // Add last page if existing. + int rest = amount - pages * OBJECTS_PER_PAGE; + if (rest > 0) { + pages++; + } + + System.out.println(); + System.out.println("\tFound " + amount + " entries delivered in " + pages + " pages!"); + + List<String> missingKorIDs = new ArrayList<String>(); + // Loop! + for (int p = 25; p <= pages; p++) { + + System.out.print("\tGetting next page " + p + " --> "); + + int pagesToFetch = OBJECTS_PER_PAGE; + // Check if amount was set by hand... + if (p == pages) { + pagesToFetch = rest; + } + JSONArray newMediaList = + ConedaKorQueries.getAllImageMetadata(pagesToFetch, p).getJSONArray("records"); + + System.out.print("ID:" + newMediaList.getJSONObject(0).getInt("id") + "-ID:"); + System.out.println(newMediaList.getJSONObject(newMediaList.length() - 1).getInt("id")); + + // Check TextGrid objects. + missingKorIDs.addAll(checkForTextGridExistance(newMediaList, this.tgsearch)); + } + + // Sort list alphabetically. + Collections.sort(missingKorIDs); + + System.out.println( + "\t" + missingKorIDs.size() + " KOR IDs are missing in TextGrid (" + missingKorIDs.get(0) + + "-" + missingKorIDs.get(missingKorIDs.size() - 1) + "): " + missingKorIDs); + } + /** * <p> * Get MODIFIED image metadata from ConedaKOR and update existing objects in TextGrid Repository. @@ -563,6 +635,40 @@ public class ConedaKor2TextGridRep { return result; } + /** + * <p> + * Check for existence of the ConedaKOR objects in TextGridLab. + * </p> + * + * @param newMetadata + * @throws IOException + * @throws CrudClientException + */ + private static List<String> checkForTextGridExistance(JSONArray newMetadata, + TGSearchQueries tgsearchClient) { + + List<String> result = new ArrayList<String>(); + + for (int i = 0; i < newMetadata.length(); i++) { + JSONObject jsonObjectMetadata = newMetadata.getJSONObject(i); + String korID = String.valueOf(jsonObjectMetadata.getInt("id")); + + System.out.print("\t[" + (i + 1) + "] " + korID + " --> "); + + // Check for KOR ID in TextGrid titles. + try { + String textgridURI = tgsearchClient.getTextGridURIfromKorID(korID); + System.out.println(textgridURI); + } catch (NotFoundException e) { + result.add(korID); + + System.out.println("[ERROR] " + e.getMessage()); + } + } + + return result; + } + /** * <p> * Create a new collection with resources from the given URI list and title. diff --git a/src/main/java/org/classicmayan/tools/IdiomConstants.java b/src/main/java/org/classicmayan/tools/IdiomConstants.java index 0179802fab1c90bb62e4b4cc452b866b7357d20e..566fe513f1383b474e757196362c21379f8a24ed 100644 --- a/src/main/java/org/classicmayan/tools/IdiomConstants.java +++ b/src/main/java/org/classicmayan/tools/IdiomConstants.java @@ -35,6 +35,7 @@ public final class IdiomConstants { public static final String SCOPE_IMPORT_ALL = "importAll"; public static final String SCOPE_UPDATE = "update"; public static final String SCOPE_IMPORT_NEW = "importNew"; + public static final String SCOPE_CHECK_ALL = "checkAll"; public static final String DRY_RUN_PROPERTY = "dryRun"; public static final String SID_PROPERTY = "tg.sessionID"; public static final String PID_PROPERTY = "tg.projectID"; diff --git a/src/main/java/org/classicmayan/tools/TGSearchQueries.java b/src/main/java/org/classicmayan/tools/TGSearchQueries.java index 5f5577eb23f81b886c6f445741d1ab83a8961848..9abf69b65814e4d8fd7dea1407ff06185f679bc0 100644 --- a/src/main/java/org/classicmayan/tools/TGSearchQueries.java +++ b/src/main/java/org/classicmayan/tools/TGSearchQueries.java @@ -298,10 +298,10 @@ public class TGSearchQueries { if (notesURI.equals(titleURI)) { result = notesURI; } else if (notesURI.isEmpty()) { - System.out.print("[WARNING! No TextGrid URI in <title> for " + korID + "] "); + System.out.print("[WARNING! No KOR ID in <notes> for " + korID + "] "); result = titleURI; } else { - System.out.print("[WARNING! No TextGrid URI in <notes> for " + korID + "] "); + System.out.print("[WARNING! KOR ID in <notes> differs from KOR ID in title: " + korID + "] "); result = notesURI; }