Merge pull request #8 from OCR-D/hotfix

Hotfix

Merge pull request #8 from OCR-D/hotfix
da1ef015 · mweidling · GitHub · 3a2c7272 · 5835682b · da1ef015
Unverified Commit da1ef015 authored 1 year ago by mweidling Committed by GitHub 1 year ago
--- a/Makefile
+++ b/Makefile
+build:
+	docker compose build
+
+start:
+	docker compose run -d app
+
+prepare-default-gt:
+	docker compose exec app bash scripts/prepare.sh
+
+run:
+	docker compose exec app bash workflows/execute_workflows.sh > logs/run_$$(date +"%s").log
+
+stop:
+	CONTAINER_ID=$$(docker ps | grep quiver | cut -d' ' -f1); docker container stop $$CONTAINER_ID && docker container remove $$CONTAINER_ID
\ No newline at end of file
--- a/README.md
+++ b/README.md
@@ -10,6 +10,7 @@ QuiVer Benchmarks is based on `ocrd/all:maximum` and has all OCR-D processors at

 - Docker >= 23.0.0
 - [Docker Compose plugin](https://docs.docker.com/compose/install/linux/#install-using-the-repository)
+- make

 To speed up QuiVer Benchmarks you can mount already downloaded text recognition models to `/usr/local/share/ocrd-resources/` in `docker-compose.yml` by adding

@@ -24,11 +25,12 @@ Otherwise, the tool will download all `ocrd-tesserocr-recognize` models as well

 - clone this repository and switch to the cloned directory
 - (optional) [customize](#custom-workflows-and-data) QuiVer Benchmarks according to your needs
- build the image with `docker compose build`
- spin up a container with `docker compose run -d app`
- run `docker compose exec app bash scripts/prepare.sh`
- run `docker compose exec app bash workflows/execute_workflows.sh`
+- build the image with `make build`
+- spin up a container with `make start`
+- run `make prepare-default-gt`
+- run `make run`
 - the benchmarks and the evaluation results will be available at `data/workflows.json` on your host system
+- when finished, run `make stop` to shut down and remove the Docker container you created previously

 ## Benchmarks Considered


--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -9,6 +9,7 @@ services:
    volumes:
      - ./data:/app/data # this will write the results to your host system
      - ./gt:/app/gt
+      - ./workflows:/app/workflows
      # mount your modules, custom workflows and data here
      #- ./models/ocrd-tesserocr-recognize:/usr/local/share/tessdata/
      #- ./models/ocrd-calamari-recognize:/usr/local/share/ocrd-resources/

--- a/workflows/execute_workflows.sh
+++ b/workflows/execute_workflows.sh
@@ -39,7 +39,7 @@ convert_ocrd_wfs_to_NextFlow() {

 download_models() {
    echo "Download the necessary models if not available"
-    if [[ ! -d /usr/local/share/tessdata ]]
+    if [[ ! -f /usr/local/share/tessdata/Fraktur_GT4HistOCR.traineddata ]]
    then
        #mkdir -p /usr/local/share/ocrd-resources/
        ocrd resmgr download ocrd-tesserocr-recognize '*'
@@ -91,10 +91,12 @@ execute_wfs_and_extract_benchmarks() {
    # for all data sets…
    for WS_DIR in "$WORKSPACE_DIR"/*
    do
-        if [ -d "$WS_DIR" ]; then
+        INNER_DIR=$(ls "$WS_DIR"/data/)
+
+        if [ -d "$WS_DIR" ] &&  ! grep -q "OCR-D-OCR" "$WS_DIR/data/$INNER_DIR/mets.xml" ; then
            echo "Switching to $WS_DIR."

-            DIR_NAME=$(basename $WS_DIR)
+            DIR_NAME=$(basename "$WS_DIR")

            run "$WS_DIR"/data/*/*ocr.txt.nf "$DIR_NAME" "$WS_DIR"
            run "$WS_DIR"/data/*/*eval.txt.nf "$DIR_NAME" "$WS_DIR"
@@ -155,7 +157,7 @@ save_workspaces() {
    # $2: $DIR_NAME
    # $3: $WORKFLOW
    echo "Zipping workspace $1"
-    ocrd zip bag -d "$DIR_NAME"/data/* -i "$DIR_NAME"/data/* "$DIR_NAME"
+    ocrd -l ERROR zip bag -d "$DIR_NAME"/data/* -i "$DIR_NAME"/data/* "$DIR_NAME"
    WORKFLOW_NAME=$(basename -s .txt.nf "$3")
    mv "$WORKSPACE_DIR"/"$2".zip "$WORKFLOW_DIR"/results/"$2"_"$WORKFLOW_NAME".zip
 }