Skip to content
Snippets Groups Projects
Unverified Commit da1ef015 authored by mweidling's avatar mweidling Committed by GitHub
Browse files

Merge pull request #8 from OCR-D/hotfix

Hotfix
parents 3a2c7272 5835682b
No related branches found
No related tags found
1 merge request!1Merge GitHub's state
Makefile 0 → 100644
build:
docker compose build
start:
docker compose run -d app
prepare-default-gt:
docker compose exec app bash scripts/prepare.sh
run:
docker compose exec app bash workflows/execute_workflows.sh > logs/run_$$(date +"%s").log
stop:
CONTAINER_ID=$$(docker ps | grep quiver | cut -d' ' -f1); docker container stop $$CONTAINER_ID && docker container remove $$CONTAINER_ID
\ No newline at end of file
......@@ -10,6 +10,7 @@ QuiVer Benchmarks is based on `ocrd/all:maximum` and has all OCR-D processors at
- Docker >= 23.0.0
- [Docker Compose plugin](https://docs.docker.com/compose/install/linux/#install-using-the-repository)
- make
To speed up QuiVer Benchmarks you can mount already downloaded text recognition models to `/usr/local/share/ocrd-resources/` in `docker-compose.yml` by adding
......@@ -24,11 +25,12 @@ Otherwise, the tool will download all `ocrd-tesserocr-recognize` models as well
- clone this repository and switch to the cloned directory
- (optional) [customize](#custom-workflows-and-data) QuiVer Benchmarks according to your needs
- build the image with `docker compose build`
- spin up a container with `docker compose run -d app`
- run `docker compose exec app bash scripts/prepare.sh`
- run `docker compose exec app bash workflows/execute_workflows.sh`
- build the image with `make build`
- spin up a container with `make start`
- run `make prepare-default-gt`
- run `make run`
- the benchmarks and the evaluation results will be available at `data/workflows.json` on your host system
- when finished, run `make stop` to shut down and remove the Docker container you created previously
## Benchmarks Considered
......
......@@ -9,6 +9,7 @@ services:
volumes:
- ./data:/app/data # this will write the results to your host system
- ./gt:/app/gt
- ./workflows:/app/workflows
# mount your modules, custom workflows and data here
#- ./models/ocrd-tesserocr-recognize:/usr/local/share/tessdata/
#- ./models/ocrd-calamari-recognize:/usr/local/share/ocrd-resources/
......
......@@ -39,7 +39,7 @@ convert_ocrd_wfs_to_NextFlow() {
download_models() {
echo "Download the necessary models if not available"
if [[ ! -d /usr/local/share/tessdata ]]
if [[ ! -f /usr/local/share/tessdata/Fraktur_GT4HistOCR.traineddata ]]
then
#mkdir -p /usr/local/share/ocrd-resources/
ocrd resmgr download ocrd-tesserocr-recognize '*'
......@@ -91,10 +91,12 @@ execute_wfs_and_extract_benchmarks() {
# for all data sets…
for WS_DIR in "$WORKSPACE_DIR"/*
do
if [ -d "$WS_DIR" ]; then
INNER_DIR=$(ls "$WS_DIR"/data/)
if [ -d "$WS_DIR" ] && ! grep -q "OCR-D-OCR" "$WS_DIR/data/$INNER_DIR/mets.xml" ; then
echo "Switching to $WS_DIR."
DIR_NAME=$(basename $WS_DIR)
DIR_NAME=$(basename "$WS_DIR")
run "$WS_DIR"/data/*/*ocr.txt.nf "$DIR_NAME" "$WS_DIR"
run "$WS_DIR"/data/*/*eval.txt.nf "$DIR_NAME" "$WS_DIR"
......@@ -155,7 +157,7 @@ save_workspaces() {
# $2: $DIR_NAME
# $3: $WORKFLOW
echo "Zipping workspace $1"
ocrd zip bag -d "$DIR_NAME"/data/* -i "$DIR_NAME"/data/* "$DIR_NAME"
ocrd -l ERROR zip bag -d "$DIR_NAME"/data/* -i "$DIR_NAME"/data/* "$DIR_NAME"
WORKFLOW_NAME=$(basename -s .txt.nf "$3")
mv "$WORKSPACE_DIR"/"$2".zip "$WORKFLOW_DIR"/results/"$2"_"$WORKFLOW_NAME".zip
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment