diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..9df122cc5cae08f7d7207f09ebb792e91a941e09 --- /dev/null +++ b/Makefile @@ -0,0 +1,14 @@ +build: + docker compose build + +start: + docker compose run -d app + +prepare-default-gt: + docker compose exec app bash scripts/prepare.sh + +run: + docker compose exec app bash workflows/execute_workflows.sh > logs/run_$$(date +"%s").log + +stop: + CONTAINER_ID=$$(docker ps | grep quiver | cut -d' ' -f1); docker container stop $$CONTAINER_ID && docker container remove $$CONTAINER_ID \ No newline at end of file diff --git a/README.md b/README.md index d065e1fbf57c89fccc0d880657625f9d7e0046a1..2ca28618192c1b458f7fe46cb1864c0572738736 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ QuiVer Benchmarks is based on `ocrd/all:maximum` and has all OCR-D processors at - Docker >= 23.0.0 - [Docker Compose plugin](https://docs.docker.com/compose/install/linux/#install-using-the-repository) +- make To speed up QuiVer Benchmarks you can mount already downloaded text recognition models to `/usr/local/share/ocrd-resources/` in `docker-compose.yml` by adding @@ -24,11 +25,12 @@ Otherwise, the tool will download all `ocrd-tesserocr-recognize` models as well - clone this repository and switch to the cloned directory - (optional) [customize](#custom-workflows-and-data) QuiVer Benchmarks according to your needs -- build the image with `docker compose build` -- spin up a container with `docker compose run -d app` -- run `docker compose exec app bash scripts/prepare.sh` -- run `docker compose exec app bash workflows/execute_workflows.sh` +- build the image with `make build` +- spin up a container with `make start` +- run `make prepare-default-gt` +- run `make run` - the benchmarks and the evaluation results will be available at `data/workflows.json` on your host system +- when finished, run `make stop` to shut down and remove the Docker container you created previously ## Benchmarks Considered diff --git a/docker-compose.yml b/docker-compose.yml index b7ca20e496333ece954a5f7ccc6fdcf427698bc6..b1d715ca204e72bd056e6512ec3d786751b58260 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,6 +9,7 @@ services: volumes: - ./data:/app/data # this will write the results to your host system - ./gt:/app/gt + - ./workflows:/app/workflows # mount your modules, custom workflows and data here #- ./models/ocrd-tesserocr-recognize:/usr/local/share/tessdata/ #- ./models/ocrd-calamari-recognize:/usr/local/share/ocrd-resources/ diff --git a/workflows/execute_workflows.sh b/workflows/execute_workflows.sh index db8957db8dcbc40905f1667b984ee400f8b231b6..efe1a6f9eefa3bc7e1aeeab446db80a175db5eb1 100755 --- a/workflows/execute_workflows.sh +++ b/workflows/execute_workflows.sh @@ -39,7 +39,7 @@ convert_ocrd_wfs_to_NextFlow() { download_models() { echo "Download the necessary models if not available" - if [[ ! -d /usr/local/share/tessdata ]] + if [[ ! -f /usr/local/share/tessdata/Fraktur_GT4HistOCR.traineddata ]] then #mkdir -p /usr/local/share/ocrd-resources/ ocrd resmgr download ocrd-tesserocr-recognize '*' @@ -91,10 +91,12 @@ execute_wfs_and_extract_benchmarks() { # for all data sets… for WS_DIR in "$WORKSPACE_DIR"/* do - if [ -d "$WS_DIR" ]; then + INNER_DIR=$(ls "$WS_DIR"/data/) + + if [ -d "$WS_DIR" ] && ! grep -q "OCR-D-OCR" "$WS_DIR/data/$INNER_DIR/mets.xml" ; then echo "Switching to $WS_DIR." - DIR_NAME=$(basename $WS_DIR) + DIR_NAME=$(basename "$WS_DIR") run "$WS_DIR"/data/*/*ocr.txt.nf "$DIR_NAME" "$WS_DIR" run "$WS_DIR"/data/*/*eval.txt.nf "$DIR_NAME" "$WS_DIR" @@ -155,7 +157,7 @@ save_workspaces() { # $2: $DIR_NAME # $3: $WORKFLOW echo "Zipping workspace $1" - ocrd zip bag -d "$DIR_NAME"/data/* -i "$DIR_NAME"/data/* "$DIR_NAME" + ocrd -l ERROR zip bag -d "$DIR_NAME"/data/* -i "$DIR_NAME"/data/* "$DIR_NAME" WORKFLOW_NAME=$(basename -s .txt.nf "$3") mv "$WORKSPACE_DIR"/"$2".zip "$WORKFLOW_DIR"/results/"$2"_"$WORKFLOW_NAME".zip }