From 3a2c7272762d481a7a6ec22265b5d6e7e96bdb8b Mon Sep 17 00:00:00 2001 From: mweidling <13831557+mweidling@users.noreply.github.com> Date: Wed, 19 Apr 2023 12:06:55 +0200 Subject: [PATCH] Add reichsanzeiger gt (#5) * feat: add Reichsanzeiger-GT to prepare.sh * add Reichsanzeiger subsets * add download for Reichsanzeiger subsets * add first draft for reichsanzeiger gt * extract sample bundles * update .gitignore * clean up * tidy up project root * tidy up volumes * update README * update data * build: roll back to version where cis runs still runs * build: remove default model mounting --- .gitignore | 3 +- Dockerfile | 7 +- README.md | 2 +- data/workflows.json | 8857 +++-------------- .../default_data_sources.txt | 0 data_srcs/reichsanzeiger_full.txt | 1 + data_srcs/reichsanzeiger_many_ads.list | 5 + data_srcs/reichsanzeiger_random.list | 6 + data_srcs/reichsanzeiger_tables.list | 5 + data_srcs/reichsanzeiger_title_pages.list | 5 + docker-compose.yml | 5 +- prepare.sh | 41 - scripts/convert-yml-to-json.py | 22 + scripts/prepare.sh | 98 + scripts/prepare_reichsanzeiger_sets.sh | 68 + src/benchmark_extraction.py | 18 +- workflows/execute_workflows.sh | 54 +- 17 files changed, 1542 insertions(+), 7655 deletions(-) rename default_data_sources.txt => data_srcs/default_data_sources.txt (100%) create mode 100644 data_srcs/reichsanzeiger_full.txt create mode 100644 data_srcs/reichsanzeiger_many_ads.list create mode 100644 data_srcs/reichsanzeiger_random.list create mode 100644 data_srcs/reichsanzeiger_tables.list create mode 100644 data_srcs/reichsanzeiger_title_pages.list delete mode 100644 prepare.sh create mode 100644 scripts/convert-yml-to-json.py create mode 100644 scripts/prepare.sh create mode 100644 scripts/prepare_reichsanzeiger_sets.sh diff --git a/.gitignore b/.gitignore index 77ec93a..5b5e2ef 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,8 @@ workflows/workspaces work/ workflows/nf-results/* workflows/results -workflows/ocrd-workflows/*.nf +workflows/ocrd_workflows/*.nf models .idea gt/* +build/ diff --git a/Dockerfile b/Dockerfile index ae906df..fe94011 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ocrd/all:maximum +FROM ocrd/all:2023-02-07 WORKDIR /app @@ -12,6 +12,8 @@ RUN apt-get install -y --fix-missing openjdk-11-jre COPY src src COPY setup.py setup.py COPY README.md README.md +COPY scripts scripts +COPY data_srcs data_srcs RUN git init RUN git submodule add https://github.com/MehmedGIT/OtoN_Converter submodules/oton @@ -22,9 +24,6 @@ RUN cd submodules/oton && \ sed -i "s \$projectDir/ocrd-workspace/ $WORKSPACE_DIR/CURRENT/ g" oton/config.toml && \ pip install . -COPY prepare.sh prepare.sh -COPY default_data_sources.txt default_data_sources.txt - RUN pip3 install -r requirements.txt RUN pip3 install . RUN nextflow diff --git a/README.md b/README.md index 98e95f0..d065e1f 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ Otherwise, the tool will download all `ocrd-tesserocr-recognize` models as well - (optional) [customize](#custom-workflows-and-data) QuiVer Benchmarks according to your needs - build the image with `docker compose build` - spin up a container with `docker compose run -d app` -- run `docker compose exec app bash prepare.sh` +- run `docker compose exec app bash scripts/prepare.sh` - run `docker compose exec app bash workflows/execute_workflows.sh` - the benchmarks and the evaluation results will be available at `data/workflows.json` on your host system diff --git a/data/workflows.json b/data/workflows.json index b377f0b..bbc48ae 100644 --- a/data/workflows.json +++ b/data/workflows.json @@ -1,7 +1,7 @@ [ { - "eval_workflow_id": "wf-data16_ant_complex_alberti_pictura_1540_minimal_ocr-eval", - "label": "Workflow on data 16_ant_complex_alberti_pictura_1540_minimal_ocr", + "eval_workflow_id": "wf-datareichsanzeiger_tables-eval", + "label": "Workflow on data reichsanzeiger_tables", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", @@ -12,16 +12,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_ant_complex.ocrd.zip", - "label": "GT workspace 16th century Antiqua complex layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/reichsanzeiger_tables.ocrd.zip", + "label": "GT workspace reichsanzeiger_tables" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_complex_alberti_pictura_1540_minimal_ocr_ocr.zip", - "label": "OCR workspace for 16_ant_complex_alberti_pictura_1540_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_tables_ocr.zip", + "label": "OCR workspace for reichsanzeiger_tables" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_complex_alberti_pictura_1540_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_ant_complex_alberti_pictura_1540_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_tables_evaluation.zip", + "label": "Evaluation workspace for reichsanzeiger_tables" }, "workflow_steps": [ { @@ -56,5863 +56,62 @@ "document_metadata": { "data_properties": { "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1500-1600", - "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 3, - "layout": "complex" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 7.386463, - "cpu_time": 9.83532, - "cer_mean": 0.10240852523716282, - "cer_median": 0.10536980749746708, - "cer_range": [ - 0.07124352331606218, - 0.1306122448979592 - ], - "cer_standard_deviation": 0.02979493530847308, - "wer": 0.23466068901129858, - "pages_per_minute": 24.368902951250146 - }, - "by_page": [ - { - "page_id": "phys_0007", - "cer": 0.07124352331606218, - "wer": 0.2231404958677686 - }, - { - "page_id": "phys_0008", - "cer": 0.10536980749746708, - "wer": 0.2484472049689441 - }, - { - "page_id": "phys_0009", - "cer": 0.1306122448979592, - "wer": 0.2323943661971831 - } - ] - } - }, - { - "eval_workflow_id": "wf-data18_frak_simple_lessing_menschengeschlecht_1780_slower_processors_ocr-eval", - "label": "Workflow on data 18_frak_simple_lessing_menschengeschlecht_1780_slower_processors_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_simple.ocrd.zip", - "label": "GT workspace 18th century Black letter simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_simple_lessing_menschengeschlecht_1780_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_simple_lessing_menschengeschlecht_1780_slower_processors_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_simple_lessing_menschengeschlecht_1780_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_simple_lessing_menschengeschlecht_1780_slower_processors_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-cis-ocropy-denoise", - "params": { - "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-tesserocr-segment", - "params": { - "dpi": 0, - "padding": 4, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-tesserocr-recognize", - "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "segmentation_level": "word", - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 1, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 8.055356999999999, - "cpu_time": 12.105611, - "cer_mean": 0.0199501246882793, - "cer_median": 0.0199501246882793, - "cer_range": [ - 0.0199501246882793, - 0.0199501246882793 - ], - "cer_standard_deviation": null, - "wer": 0.09836065573770492, - "pages_per_minute": 7.44845945375233 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.0199501246882793, - "wer": 0.09836065573770492 - } - ] - } - }, - { - "eval_workflow_id": "wf-data18_frak_complex_nn_besuch_1780_selected_pages_ocr-eval", - "label": "Workflow on data 18_frak_complex_nn_besuch_1780_selected_pages_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", - "label": "GT workspace 18th century Black letter complex layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_nn_besuch_1780_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_complex_nn_besuch_1780_selected_pages_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_nn_besuch_1780_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_complex_nn_besuch_1780_selected_pages_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 301, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", - "params": { - "level-of-operation": "page", - "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-cis-ocropy-segment", - "params": { - "level-of-operation": "page", - "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-calamari-recognize", - "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 - } - } - ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 4, - "layout": "complex" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 76.547632, - "cpu_time": 113.81064699999999, - "cer_mean": 0.05959514724403742, - "cer_median": 0.0324919239726542, - "cer_range": [ - 0.008839779005524863, - 0.16455696202531644 - ], - "cer_standard_deviation": 0.07087591559854228, - "wer": 0.13238323035014285, - "pages_per_minute": 3.1353027354262246 - }, - "by_page": [ - { - "page_id": "phys_00001", - "cer": 0.16455696202531644, - "wer": 0.27586206896551724 - }, - { - "page_id": "phys_00002", - "cer": 0.008839779005524863, - "wer": 0.04046242774566474 - }, - { - "page_id": "phys_00003", - "cer": 0.030501089324618737, - "wer": 0.09826589595375723 - }, - { - "page_id": "phys_00004", - "cer": 0.034482758620689655, - "wer": 0.11494252873563218 - } - ] - } - }, - { - "eval_workflow_id": "wf-data18_frak_complex_estor_rechtsgelehrsamkeit02_1758_selected_pages_ocr-eval", - "label": "Workflow on data 18_frak_complex_estor_rechtsgelehrsamkeit02_1758_selected_pages_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", - "label": "GT workspace 18th century Black letter complex layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_estor_rechtsgelehrsamkeit02_1758_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_complex_estor_rechtsgelehrsamkeit02_1758_selected_pages_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_estor_rechtsgelehrsamkeit02_1758_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_complex_estor_rechtsgelehrsamkeit02_1758_selected_pages_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 301, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", - "params": { - "level-of-operation": "page", - "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-cis-ocropy-segment", - "params": { - "level-of-operation": "page", - "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-calamari-recognize", - "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 - } - } - ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 4, - "layout": "complex" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 154.933488, - "cpu_time": 215.310644, - "cer_mean": 0.12432674382712249, - "cer_median": 0.12040572654031183, - "cer_range": [ - 0.0718294051627385, - 0.18466611706512778 - ], - "cer_standard_deviation": 0.04665572742682036, - "wer": 0.18974770097390478, - "pages_per_minute": 1.5490518098966441 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.12757201646090535, - "wer": 0.14035087719298245 - }, - { - "page_id": "phys_0002", - "cer": 0.1132394366197183, - "wer": 0.1950354609929078 - }, - { - "page_id": "phys_0003", - "cer": 0.0718294051627385, - "wer": 0.15087719298245614 - }, - { - "page_id": "phys_0004", - "cer": 0.18466611706512778, - "wer": 0.2727272727272727 - } - ] - } - }, - { - "eval_workflow_id": "wf-data18_fontmix_complex_benner_herrnhuterey04_1748_slower_processors_ocr-eval", - "label": "Workflow on data 18_fontmix_complex_benner_herrnhuterey04_1748_slower_processors_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_fontmix_complex.ocrd.zip", - "label": "GT workspace 18th century Font Mix complex layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_fontmix_complex_benner_herrnhuterey04_1748_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 18_fontmix_complex_benner_herrnhuterey04_1748_slower_processors_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_fontmix_complex_benner_herrnhuterey04_1748_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_fontmix_complex_benner_herrnhuterey04_1748_slower_processors_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-cis-ocropy-denoise", - "params": { - "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-tesserocr-segment", - "params": { - "dpi": 0, - "padding": 4, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-tesserocr-recognize", - "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "segmentation_level": "word", - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 4, - "layout": "complex" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 67.309474, - "cpu_time": 101.85330800000001, - "cer_mean": 0.16726583056278752, - "cer_median": 0.09637318392327315, - "cer_range": [ - 0.03187250996015936, - 0.4444444444444444 - ], - "cer_standard_deviation": 0.18889822286887584, - "wer": 0.28912998545359864, - "pages_per_minute": 3.565619900699269 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.4444444444444444, - "wer": 0.6204379562043796 - }, - { - "page_id": "phys_0002", - "cer": 0.03187250996015936, - "wer": 0.10919540229885058 - }, - { - "page_id": "phys_0003", - "cer": 0.1266778523489933, - "wer": 0.2681564245810056 - }, - { - "page_id": "phys_0004", - "cer": 0.06606851549755302, - "wer": 0.15873015873015872 - } - ] - } - }, - { - "eval_workflow_id": "wf-data18_frak_complex_justi_abhandlung01_1758_selected_pages_ocr-eval", - "label": "Workflow on data 18_frak_complex_justi_abhandlung01_1758_selected_pages_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", - "label": "GT workspace 18th century Black letter complex layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_justi_abhandlung01_1758_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_complex_justi_abhandlung01_1758_selected_pages_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_justi_abhandlung01_1758_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_complex_justi_abhandlung01_1758_selected_pages_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 301, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", - "params": { - "level-of-operation": "page", - "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-cis-ocropy-segment", - "params": { - "level-of-operation": "page", - "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-calamari-recognize", - "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 - } - } - ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 4, - "layout": "complex" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 122.82167999999999, - "cpu_time": 173.67148600000002, - "cer_mean": 0.05991629368071161, - "cer_median": 0.0496849827491332, - "cer_range": [ - 0.0019973368841544607, - 0.13829787234042554 - ], - "cer_standard_deviation": 0.06406814571829976, - "wer": 0.09724037283059882, - "pages_per_minute": 1.954052411593784 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.013584117032392894, - "wer": 0.06666666666666667 - }, - { - "page_id": "phys_0002", - "cer": 0.13829787234042554, - "wer": 0.1875 - }, - { - "page_id": "phys_0003", - "cer": 0.08578584846587352, - "wer": 0.12598425196850394 - }, - { - "page_id": "phys_0004", - "cer": 0.0019973368841544607, - "wer": 0.00881057268722467 - } - ] - } - }, - { - "eval_workflow_id": "wf-data18_frak_complex_luz_blitz_1784_selected_pages_ocr-eval", - "label": "Workflow on data 18_frak_complex_luz_blitz_1784_selected_pages_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", - "label": "GT workspace 18th century Black letter complex layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_luz_blitz_1784_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_complex_luz_blitz_1784_selected_pages_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_luz_blitz_1784_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_complex_luz_blitz_1784_selected_pages_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 301, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", - "params": { - "level-of-operation": "page", - "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-cis-ocropy-segment", - "params": { - "level-of-operation": "page", - "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-calamari-recognize", - "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 - } - } - ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 4, - "layout": "complex" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 145.735792, - "cpu_time": 208.46528999999998, - "cer_mean": 0.05256131079939043, - "cer_median": 0.029898228835687546, - "cer_range": [ - 0.009184845005740528, - 0.1412639405204461 - ], - "cer_standard_deviation": 0.06044359233653714, - "wer": 0.15936235092429332, - "pages_per_minute": 1.6468157664384877 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.1412639405204461, - "wer": 0.38461538461538464 - }, - { - "page_id": "phys_0002", - "cer": 0.039473684210526314, - "wer": 0.13043478260869565 - }, - { - "page_id": "phys_0003", - "cer": 0.020322773460848775, - "wer": 0.06792452830188679 - }, - { - "page_id": "phys_0004", - "cer": 0.009184845005740528, - "wer": 0.054474708171206226 - } - ] - } - }, - { - "eval_workflow_id": "wf-data18_frak_complex_luz_blitz_1784_minimal_ocr-eval", - "label": "Workflow on data 18_frak_complex_luz_blitz_1784_minimal_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", - "label": "GT workspace 18th century Black letter complex layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_luz_blitz_1784_minimal_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_complex_luz_blitz_1784_minimal_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_luz_blitz_1784_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_complex_luz_blitz_1784_minimal_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-tesserocr-recognize", - "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 4, - "layout": "complex" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 18.435259, - "cpu_time": 21.704126, - "cer_mean": 0.030367084502750087, - "cer_median": 0.02381735856100435, - "cer_range": [ - 0.014354066985645933, - 0.05947955390334572 - ], - "cer_standard_deviation": 0.02030808283356641, - "wer": 0.08722583259487592, - "pages_per_minute": 13.018531499882915 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.05947955390334572, - "wer": 0.1794871794871795 - }, - { - "page_id": "phys_0002", - "cer": 0.014354066985645933, - "wer": 0.043478260869565216 - }, - { - "page_id": "phys_0003", - "cer": 0.02869097429766886, - "wer": 0.07924528301886792 - }, - { - "page_id": "phys_0004", - "cer": 0.01894374282433984, - "wer": 0.04669260700389105 - } - ] - } - }, - { - "eval_workflow_id": "wf-data16_frak_simple_luther_auszlegunge_1520_slower_processors_ocr-eval", - "label": "Workflow on data 16_frak_simple_luther_auszlegunge_1520_slower_processors_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_frak_simple.ocrd.zip", - "label": "GT workspace 16th century Black letter simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_luther_auszlegunge_1520_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 16_frak_simple_luther_auszlegunge_1520_slower_processors_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_luther_auszlegunge_1520_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_frak_simple_luther_auszlegunge_1520_slower_processors_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-cis-ocropy-denoise", - "params": { - "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-tesserocr-segment", - "params": { - "dpi": 0, - "padding": 4, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-tesserocr-recognize", - "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "segmentation_level": "word", - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1500-1600", - "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 2, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 36.087631, - "cpu_time": 47.946267999999996, - "cer_mean": 0.5161379724470825, - "cer_median": 0.5161379724470825, - "cer_range": [ - 0.0754414125200642, - 0.9568345323741008 - ], - "cer_standard_deviation": 0.6232390519399567, - "wer": 0.6192780337941628, - "pages_per_minute": 3.3252390548994475 - }, - "by_page": [ - { - "page_id": "phys_0003", - "cer": 0.9568345323741008, - "wer": 0.9666666666666667 - }, - { - "page_id": "phys_0029", - "cer": 0.0754414125200642, - "wer": 0.271889400921659 - } - ] - } - }, - { - "eval_workflow_id": "wf-data18_frak_complex_bernd_lebensbeschreibung_1738_slower_processors_ocr-eval", - "label": "Workflow on data 18_frak_complex_bernd_lebensbeschreibung_1738_slower_processors_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", - "label": "GT workspace 18th century Black letter complex layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_bernd_lebensbeschreibung_1738_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_complex_bernd_lebensbeschreibung_1738_slower_processors_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_bernd_lebensbeschreibung_1738_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_complex_bernd_lebensbeschreibung_1738_slower_processors_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-cis-ocropy-denoise", - "params": { - "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-tesserocr-segment", - "params": { - "dpi": 0, - "padding": 4, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-tesserocr-recognize", - "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "segmentation_level": "word", - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 3, - "layout": "complex" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 34.033219, - "cpu_time": 53.635996999999996, - "cer_mean": 0.06688670097710152, - "cer_median": 0.03746177370030581, - "cer_range": [ - 0.0290519877675841, - 0.13414634146341464 - ], - "cer_standard_deviation": 0.058400133164399716, - "wer": 0.12383493220296966, - "pages_per_minute": 5.2889501871686 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.13414634146341464, - "wer": 0.22058823529411764 - }, - { - "page_id": "phys_0002", - "cer": 0.0290519877675841, - "wer": 0.05357142857142857 - }, - { - "page_id": "phys_0003", - "cer": 0.03746177370030581, - "wer": 0.09734513274336283 - } - ] - } - }, - { - "eval_workflow_id": "wf-data18_frak_complex_euler_rechenkunst01_1738_slower_processors_ocr-eval", - "label": "Workflow on data 18_frak_complex_euler_rechenkunst01_1738_slower_processors_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", - "label": "GT workspace 18th century Black letter complex layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_euler_rechenkunst01_1738_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_complex_euler_rechenkunst01_1738_slower_processors_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_euler_rechenkunst01_1738_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_complex_euler_rechenkunst01_1738_slower_processors_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-cis-ocropy-denoise", - "params": { - "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-tesserocr-segment", - "params": { - "dpi": 0, - "padding": 4, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-tesserocr-recognize", - "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "segmentation_level": "word", - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 6, - "layout": "complex" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 78.308453, - "cpu_time": 124.319609, - "cer_mean": 0.1500034071140843, - "cer_median": 0.12202020202020203, - "cer_range": [ - 0.03902862098872507, - 0.39473684210526316 - ], - "cer_standard_deviation": 0.12934283380557174, - "wer": 0.27891577144281926, - "pages_per_minute": 4.597204850924586 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.03902862098872507, - "wer": 0.0872093023255814 - }, - { - "page_id": "phys_0002", - "cer": 0.39473684210526316, - "wer": 0.6402439024390244 - }, - { - "page_id": "phys_0003", - "cer": 0.16756341275941583, - "wer": 0.3592233009708738 - }, - { - "page_id": "phys_0004", - "cer": 0.13737373737373737, - "wer": 0.2 - }, - { - "page_id": "phys_0005", - "cer": 0.10666666666666667, - "wer": 0.21893491124260356 - }, - { - "page_id": "phys_0006", - "cer": 0.05465116279069768, - "wer": 0.1678832116788321 - } - ] - } - }, - { - "eval_workflow_id": "wf-data16_frak_simple_luther_auszlegunge_1520_minimal_ocr-eval", - "label": "Workflow on data 16_frak_simple_luther_auszlegunge_1520_minimal_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_frak_simple.ocrd.zip", - "label": "GT workspace 16th century Black letter simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_luther_auszlegunge_1520_minimal_ocr_ocr.zip", - "label": "OCR workspace for 16_frak_simple_luther_auszlegunge_1520_minimal_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_luther_auszlegunge_1520_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_frak_simple_luther_auszlegunge_1520_minimal_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-tesserocr-recognize", - "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1500-1600", - "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 2, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 10.980085, - "cpu_time": 12.980711, - "cer_mean": 0.24855845660550213, - "cer_median": 0.24855845660550213, - "cer_range": [ - 0.07865168539325842, - 0.4184652278177458 - ], - "cer_standard_deviation": 0.2402844601873776, - "wer": 0.37300307219662054, - "pages_per_minute": 10.928877144393692 - }, - "by_page": [ - { - "page_id": "phys_0003", - "cer": 0.4184652278177458, - "wer": 0.48333333333333334 - }, - { - "page_id": "phys_0029", - "cer": 0.07865168539325842, - "wer": 0.2626728110599078 - } - ] - } - }, - { - "eval_workflow_id": "wf-data18_frak_complex_buerger_gedichte_1778_slower_processors_ocr-eval", - "label": "Workflow on data 18_frak_complex_buerger_gedichte_1778_slower_processors_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", - "label": "GT workspace 18th century Black letter complex layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_buerger_gedichte_1778_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_complex_buerger_gedichte_1778_slower_processors_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_buerger_gedichte_1778_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_complex_buerger_gedichte_1778_slower_processors_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-cis-ocropy-denoise", - "params": { - "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-tesserocr-segment", - "params": { - "dpi": 0, - "padding": 4, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-tesserocr-recognize", - "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "segmentation_level": "word", - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 2, - "layout": "complex" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 16.86402, - "cpu_time": 26.750532, - "cer_mean": 0.22786023044476886, - "cer_median": 0.22786023044476886, - "cer_range": [ - 0.053811659192825115, - 0.4019088016967126 - ], - "cer_standard_deviation": 0.24614184997615882, - "wer": 0.35125551082997897, - "pages_per_minute": 7.115741086644821 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.4019088016967126, - "wer": 0.5673758865248227 - }, - { - "page_id": "phys_0002", - "cer": 0.053811659192825115, - "wer": 0.13513513513513514 - } - ] - } - }, - { - "eval_workflow_id": "wf-data19_frak_simple_arnimb_goethe03_1835_selected_pages_ocr-eval", - "label": "Workflow on data 19_frak_simple_arnimb_goethe03_1835_selected_pages_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/19_frak_simple.ocrd.zip", - "label": "GT workspace 19th century Black letter simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_frak_simple_arnimb_goethe03_1835_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 19_frak_simple_arnimb_goethe03_1835_selected_pages_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_frak_simple_arnimb_goethe03_1835_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 19_frak_simple_arnimb_goethe03_1835_selected_pages_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 301, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", - "params": { - "level-of-operation": "page", - "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-cis-ocropy-segment", - "params": { - "level-of-operation": "page", - "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-calamari-recognize", - "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 - } - } - ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", - "document_metadata": { - "data_properties": { - "fonts": [ - "Black Letter" - ], - "publication_century": "1800-1900", - "publication_decade": "", - "publication_year": "19th century", - "number_of_pages": 1, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 45.820398, - "cpu_time": 66.91274899999999, - "cer_mean": 0.007554296506137866, - "cer_median": 0.007554296506137866, - "cer_range": [ - 0.007554296506137866, - 0.007554296506137866 - ], - "cer_standard_deviation": null, - "wer": 0.015873015873015872, - "pages_per_minute": 1.309460472167876 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.007554296506137866, - "wer": 0.015873015873015872 - } - ] - } - }, - { - "eval_workflow_id": "wf-data16_ant_simple_heyden_paedono_1548_slower_processors_ocr-eval", - "label": "Workflow on data 16_ant_simple_heyden_paedono_1548_slower_processors_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_ant_simple.ocrd.zip", - "label": "GT workspace 16th century Antiqua simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_simple_heyden_paedono_1548_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 16_ant_simple_heyden_paedono_1548_slower_processors_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_simple_heyden_paedono_1548_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_ant_simple_heyden_paedono_1548_slower_processors_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-cis-ocropy-denoise", - "params": { - "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-tesserocr-segment", - "params": { - "dpi": 0, - "padding": 4, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-tesserocr-recognize", - "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "segmentation_level": "word", - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1500-1600", - "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 3, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 32.539348, - "cpu_time": 51.522006000000005, - "cer_mean": 0.06721136853878373, - "cer_median": 0.055232558139534885, - "cer_range": [ - 0.03580246913580247, - 0.11059907834101383 - ], - "cer_standard_deviation": 0.038810463938030185, - "wer": 0.1847677033624047, - "pages_per_minute": 5.5317641890058775 - }, - "by_page": [ - { - "page_id": "phys_0007", - "cer": 0.11059907834101383, - "wer": 0.23300970873786409 - }, - { - "page_id": "phys_0013", - "cer": 0.055232558139534885, - "wer": 0.1941747572815534 - }, - { - "page_id": "phys_0014", - "cer": 0.03580246913580247, - "wer": 0.1271186440677966 - } - ] - } - }, - { - "eval_workflow_id": "wf-data16_ant_complex_alberti_pictura_1540_slower_processors_ocr-eval", - "label": "Workflow on data 16_ant_complex_alberti_pictura_1540_slower_processors_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_ant_complex.ocrd.zip", - "label": "GT workspace 16th century Antiqua complex layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_complex_alberti_pictura_1540_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 16_ant_complex_alberti_pictura_1540_slower_processors_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_complex_alberti_pictura_1540_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_ant_complex_alberti_pictura_1540_slower_processors_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-cis-ocropy-denoise", - "params": { - "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-tesserocr-segment", - "params": { - "dpi": 0, - "padding": 4, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-tesserocr-recognize", - "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "segmentation_level": "word", - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1500-1600", - "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 3, - "layout": "complex" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 30.780167000000002, - "cpu_time": 52.275692, - "cer_mean": 0.10363204260868718, - "cer_median": 0.11836734693877551, - "cer_range": [ - 0.046632124352331605, - 0.1458966565349544 - ], - "cer_standard_deviation": 0.05124654849483992, - "wer": 0.2145458690579216, - "pages_per_minute": 5.8479214878853645 - }, - "by_page": [ - { - "page_id": "phys_0007", - "cer": 0.046632124352331605, - "wer": 0.1652892561983471 - }, - { - "page_id": "phys_0008", - "cer": 0.1458966565349544, - "wer": 0.2670807453416149 - }, - { - "page_id": "phys_0009", - "cer": 0.11836734693877551, - "wer": 0.2112676056338028 - } - ] - } - }, - { - "eval_workflow_id": "wf-data19_frak_simple_arnimb_goethe03_1835_minimal_ocr-eval", - "label": "Workflow on data 19_frak_simple_arnimb_goethe03_1835_minimal_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/19_frak_simple.ocrd.zip", - "label": "GT workspace 19th century Black letter simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_frak_simple_arnimb_goethe03_1835_minimal_ocr_ocr.zip", - "label": "OCR workspace for 19_frak_simple_arnimb_goethe03_1835_minimal_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_frak_simple_arnimb_goethe03_1835_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 19_frak_simple_arnimb_goethe03_1835_minimal_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-tesserocr-recognize", - "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Black Letter" - ], - "publication_century": "1800-1900", - "publication_decade": "", - "publication_year": "19th century", - "number_of_pages": 1, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 4.437351, - "cpu_time": 5.324579, - "cer_mean": 0.004721435316336166, - "cer_median": 0.004721435316336166, - "cer_range": [ - 0.004721435316336166, - 0.004721435316336166 - ], - "cer_standard_deviation": null, - "wer": 0.015873015873015872, - "pages_per_minute": 13.521580780965943 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.004721435316336166, - "wer": 0.015873015873015872 - } - ] - } - }, - { - "eval_workflow_id": "wf-data18_fontmix_complex_benner_herrnhuterey04_1748_minimal_ocr-eval", - "label": "Workflow on data 18_fontmix_complex_benner_herrnhuterey04_1748_minimal_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_fontmix_complex.ocrd.zip", - "label": "GT workspace 18th century Font Mix complex layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_fontmix_complex_benner_herrnhuterey04_1748_minimal_ocr_ocr.zip", - "label": "OCR workspace for 18_fontmix_complex_benner_herrnhuterey04_1748_minimal_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_fontmix_complex_benner_herrnhuterey04_1748_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_fontmix_complex_benner_herrnhuterey04_1748_minimal_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-tesserocr-recognize", - "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 4, - "layout": "complex" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 20.134204, - "cpu_time": 23.74436, - "cer_mean": 0.20433470587543373, - "cer_median": 0.1700173533179325, - "cer_range": [ - 0.04063745019920319, - 0.43666666666666665 - ], - "cer_standard_deviation": 0.19097881579976753, - "wer": 0.31449770246107817, - "pages_per_minute": 11.920014319910536 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.43666666666666665, - "wer": 0.583941605839416 - }, - { - "page_id": "phys_0002", - "cer": 0.04063745019920319, - "wer": 0.10344827586206896 - }, - { - "page_id": "phys_0003", - "cer": 0.05536912751677853, - "wer": 0.18435754189944134 - }, - { - "page_id": "phys_0004", - "cer": 0.28466557911908646, - "wer": 0.3862433862433862 - } - ] - } - }, - { - "eval_workflow_id": "wf-data16_ant_simple_heyden_paedono_1548_minimal_ocr-eval", - "label": "Workflow on data 16_ant_simple_heyden_paedono_1548_minimal_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_ant_simple.ocrd.zip", - "label": "GT workspace 16th century Antiqua simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_simple_heyden_paedono_1548_minimal_ocr_ocr.zip", - "label": "OCR workspace for 16_ant_simple_heyden_paedono_1548_minimal_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_simple_heyden_paedono_1548_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_ant_simple_heyden_paedono_1548_minimal_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-tesserocr-recognize", - "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1500-1600", - "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 3, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 8.030865, - "cpu_time": 10.331791, - "cer_mean": 0.07452119312897007, - "cer_median": 0.0629800307219662, - "cer_range": [ - 0.037037037037037035, - 0.12354651162790697 - ], - "cer_standard_deviation": 0.044394494261965886, - "wer": 0.22683890077340793, - "pages_per_minute": 22.41352581571225 - }, - "by_page": [ - { - "page_id": "phys_0007", - "cer": 0.0629800307219662, - "wer": 0.21359223300970873 - }, - { - "page_id": "phys_0013", - "cer": 0.12354651162790697, - "wer": 0.33980582524271846 - }, - { - "page_id": "phys_0014", - "cer": 0.037037037037037035, - "wer": 0.1271186440677966 - } - ] - } - }, - { - "eval_workflow_id": "wf-data18_ant_simple_ballenstedt_delatio_1777_slower_processors_ocr-eval", - "label": "Workflow on data 18_ant_simple_ballenstedt_delatio_1777_slower_processors_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_ant_simple.ocrd.zip", - "label": "GT workspace 18th century Antiqua simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_ant_simple_ballenstedt_delatio_1777_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 18_ant_simple_ballenstedt_delatio_1777_slower_processors_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_ant_simple_ballenstedt_delatio_1777_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_ant_simple_ballenstedt_delatio_1777_slower_processors_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-cis-ocropy-denoise", - "params": { - "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-tesserocr-segment", - "params": { - "dpi": 0, - "padding": 4, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-tesserocr-recognize", - "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "segmentation_level": "word", - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 3, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 73.616766, - "cpu_time": 113.02993400000001, - "cer_mean": 0.02808165896942696, - "cer_median": 0.02821869488536155, - "cer_range": [ - 0.020618556701030927, - 0.03540772532188841 - ], - "cer_standard_deviation": 0.007395536576593408, - "wer": 0.13838629881265765, - "pages_per_minute": 2.445095183887866 - }, - "by_page": [ - { - "page_id": "phys_00003", - "cer": 0.020618556701030927, - "wer": 0.11392405063291139 - }, - { - "page_id": "phys_00005", - "cer": 0.02821869488536155, - "wer": 0.16115702479338842 - }, - { - "page_id": "phys_00010", - "cer": 0.03540772532188841, - "wer": 0.14007782101167315 - } - ] - } - }, - { - "eval_workflow_id": "wf-data16_frak_simple_kistler_kraeuter_1500_slower_processors_ocr-eval", - "label": "Workflow on data 16_frak_simple_kistler_kraeuter_1500_slower_processors_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_frak_simple.ocrd.zip", - "label": "GT workspace 16th century Black letter simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_kistler_kraeuter_1500_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 16_frak_simple_kistler_kraeuter_1500_slower_processors_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_kistler_kraeuter_1500_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_frak_simple_kistler_kraeuter_1500_slower_processors_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-cis-ocropy-denoise", - "params": { - "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-tesserocr-segment", - "params": { - "dpi": 0, - "padding": 4, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-tesserocr-recognize", - "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "segmentation_level": "word", - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1500-1600", - "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 2, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 34.966049000000005, - "cpu_time": 54.605062, - "cer_mean": 0.10034282802482036, - "cer_median": 0.10034282802482036, - "cer_range": [ - 0.09958847736625515, - 0.10109717868338558 - ], - "cer_standard_deviation": 0.001066812932128006, - "wer": 0.33029935275080907, - "pages_per_minute": 3.4319004700817066 - }, - "by_page": [ - { - "page_id": "phys_0007", - "cer": 0.09958847736625515, - "wer": 0.36893203883495146 - }, - { - "page_id": "phys_0021", - "cer": 0.10109717868338558, - "wer": 0.2916666666666667 - } - ] - } - }, - { - "eval_workflow_id": "wf-data18_frak_complex_bernd_lebensbeschreibung_1738_minimal_ocr-eval", - "label": "Workflow on data 18_frak_complex_bernd_lebensbeschreibung_1738_minimal_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", - "label": "GT workspace 18th century Black letter complex layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_bernd_lebensbeschreibung_1738_minimal_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_complex_bernd_lebensbeschreibung_1738_minimal_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_bernd_lebensbeschreibung_1738_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_complex_bernd_lebensbeschreibung_1738_minimal_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-tesserocr-recognize", - "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 3, - "layout": "complex" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 8.242131, - "cpu_time": 10.755872, - "cer_mean": 0.1790109644215708, - "cer_median": 0.05504587155963303, - "cer_range": [ - 0.008409785932721712, - 0.4735772357723577 - ], - "cer_standard_deviation": 0.2561653709691831, - "wer": 0.2391410103864555, - "pages_per_minute": 21.83901226515327 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.4735772357723577, - "wer": 0.5882352941176471 - }, - { - "page_id": "phys_0002", - "cer": 0.05504587155963303, - "wer": 0.09821428571428571 - }, - { - "page_id": "phys_0003", - "cer": 0.008409785932721712, - "wer": 0.030973451327433628 - } - ] - } - }, - { - "eval_workflow_id": "wf-data19_ant_simple_blumenbach_anatomie_1805_slower_processors_ocr-eval", - "label": "Workflow on data 19_ant_simple_blumenbach_anatomie_1805_slower_processors_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/19_ant_simple.ocrd.zip", - "label": "GT workspace 19th century Antiqua simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_ant_simple_blumenbach_anatomie_1805_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 19_ant_simple_blumenbach_anatomie_1805_slower_processors_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_ant_simple_blumenbach_anatomie_1805_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 19_ant_simple_blumenbach_anatomie_1805_slower_processors_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-cis-ocropy-denoise", - "params": { - "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-tesserocr-segment", - "params": { - "dpi": 0, - "padding": 4, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-tesserocr-recognize", - "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "segmentation_level": "word", - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1800-1900", - "publication_decade": "", - "publication_year": "19th century", - "number_of_pages": 3, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 42.362522, - "cpu_time": 65.91597100000001, - "cer_mean": 0.08697690719872657, - "cer_median": 0.04421052631578947, - "cer_range": [ - 0.03735325506937033, - 0.17936694021101993 - ], - "cer_standard_deviation": 0.08008554296654442, - "wer": 0.2764482431149098, - "pages_per_minute": 4.249038808407111 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.04421052631578947, - "wer": 0.16666666666666666 - }, - { - "page_id": "phys_0002", - "cer": 0.03735325506937033, - "wer": 0.17037037037037037 - }, - { - "page_id": "phys_0003", - "cer": 0.17936694021101993, - "wer": 0.49230769230769234 - } - ] - } - }, - { - "eval_workflow_id": "wf-data18_frak_complex_justi_abhandlung01_1758_slower_processors_ocr-eval", - "label": "Workflow on data 18_frak_complex_justi_abhandlung01_1758_slower_processors_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", - "label": "GT workspace 18th century Black letter complex layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_justi_abhandlung01_1758_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_complex_justi_abhandlung01_1758_slower_processors_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_justi_abhandlung01_1758_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_complex_justi_abhandlung01_1758_slower_processors_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-cis-ocropy-denoise", - "params": { - "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-tesserocr-segment", - "params": { - "dpi": 0, - "padding": 4, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-tesserocr-recognize", - "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "segmentation_level": "word", - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 4, - "layout": "complex" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 55.553484, - "cpu_time": 89.664067, - "cer_mean": 0.06272817755704542, - "cer_median": 0.04686327354908531, - "cer_range": [ - 0.0039946737683089215, - 0.15319148936170213 - ], - "cer_standard_deviation": 0.07133216362479361, - "wer": 0.0881207064150021, - "pages_per_minute": 4.320161090166731 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.0073145245559038665, - "wer": 0.02857142857142857 - }, - { - "page_id": "phys_0002", - "cer": 0.15319148936170213, - "wer": 0.20833333333333334 - }, - { - "page_id": "phys_0003", - "cer": 0.08641202254226675, - "wer": 0.10236220472440945 - }, - { - "page_id": "phys_0004", - "cer": 0.0039946737683089215, - "wer": 0.013215859030837005 - } - ] - } - }, - { - "eval_workflow_id": "wf-data18_fontmix_complex_benner_herrnhuterey04_1748_selected_pages_ocr-eval", - "label": "Workflow on data 18_fontmix_complex_benner_herrnhuterey04_1748_selected_pages_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_fontmix_complex.ocrd.zip", - "label": "GT workspace 18th century Font Mix complex layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_fontmix_complex_benner_herrnhuterey04_1748_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 18_fontmix_complex_benner_herrnhuterey04_1748_selected_pages_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_fontmix_complex_benner_herrnhuterey04_1748_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_fontmix_complex_benner_herrnhuterey04_1748_selected_pages_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 301, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", - "params": { - "level-of-operation": "page", - "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-cis-ocropy-segment", - "params": { - "level-of-operation": "page", - "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-calamari-recognize", - "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 - } - } - ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 4, - "layout": "complex" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 151.101655, - "cpu_time": 259.05950800000005, - "cer_mean": 0.140061110527823, - "cer_median": 0.06466848046246318, - "cer_range": [ - 0.028685258964143426, - 0.4022222222222222 - ], - "cer_standard_deviation": 0.17581800739756628, - "wer": 0.23651027895776314, - "pages_per_minute": 1.5883346876644073 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.4022222222222222, - "wer": 0.5474452554744526 - }, - { - "page_id": "phys_0002", - "cer": 0.028685258964143426, - "wer": 0.09195402298850575 - }, - { - "page_id": "phys_0003", - "cer": 0.07550335570469799, - "wer": 0.19553072625698323 - }, - { - "page_id": "phys_0004", - "cer": 0.053833605220228384, - "wer": 0.1111111111111111 - } - ] - } - }, - { - "eval_workflow_id": "wf-data18_frak_simple_lessing_menschengeschlecht_1780_minimal_ocr-eval", - "label": "Workflow on data 18_frak_simple_lessing_menschengeschlecht_1780_minimal_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_simple.ocrd.zip", - "label": "GT workspace 18th century Black letter simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_simple_lessing_menschengeschlecht_1780_minimal_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_simple_lessing_menschengeschlecht_1780_minimal_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_simple_lessing_menschengeschlecht_1780_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_simple_lessing_menschengeschlecht_1780_minimal_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-tesserocr-recognize", - "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 1, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 1.751117, - "cpu_time": 2.32017, - "cer_mean": 0.02493765586034913, - "cer_median": 0.02493765586034913, - "cer_range": [ - 0.02493765586034913, - 0.02493765586034913 - ], - "cer_standard_deviation": null, - "wer": 0.09836065573770492, - "pages_per_minute": 34.263844163468235 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.02493765586034913, - "wer": 0.09836065573770492 - } - ] - } - }, - { - "eval_workflow_id": "wf-data16_frak_simple_trota_mordtbrenner_1540_slower_processors_ocr-eval", - "label": "Workflow on data 16_frak_simple_trota_mordtbrenner_1540_slower_processors_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_frak_simple.ocrd.zip", - "label": "GT workspace 16th century Black letter simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_trota_mordtbrenner_1540_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 16_frak_simple_trota_mordtbrenner_1540_slower_processors_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_trota_mordtbrenner_1540_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_frak_simple_trota_mordtbrenner_1540_slower_processors_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-cis-ocropy-denoise", - "params": { - "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-tesserocr-segment", - "params": { - "dpi": 0, - "padding": 4, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-tesserocr-recognize", - "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "segmentation_level": "word", - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1500-1600", - "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 2, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 28.966472, - "cpu_time": 40.91749, - "cer_mean": 0.07499762380001901, - "cer_median": 0.07499762380001901, - "cer_range": [ - 0.0658682634730539, - 0.08412698412698413 - ], - "cer_standard_deviation": 0.012910865190184943, - "wer": 0.15174388339406558, - "pages_per_minute": 4.14272059089557 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.0658682634730539, - "wer": 0.18584070796460178 - }, - { - "page_id": "phys_0002", - "cer": 0.08412698412698413, - "wer": 0.11764705882352941 - } - ] - } - }, - { - "eval_workflow_id": "wf-data16_frak_simple_trota_mordtbrenner_1540_selected_pages_ocr-eval", - "label": "Workflow on data 16_frak_simple_trota_mordtbrenner_1540_selected_pages_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_frak_simple.ocrd.zip", - "label": "GT workspace 16th century Black letter simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_trota_mordtbrenner_1540_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 16_frak_simple_trota_mordtbrenner_1540_selected_pages_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_trota_mordtbrenner_1540_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_frak_simple_trota_mordtbrenner_1540_selected_pages_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 301, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", - "params": { - "level-of-operation": "page", - "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-cis-ocropy-segment", - "params": { - "level-of-operation": "page", - "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-calamari-recognize", - "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 - } - } - ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1500-1600", - "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 2, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 108.32675099999999, - "cpu_time": 136.94418900000002, - "cer_mean": 0.07708867978329056, - "cer_median": 0.07708867978329056, - "cer_range": [ - 0.05238095238095238, - 0.10179640718562874 - ], - "cer_standard_deviation": 0.034942003187804015, - "wer": 0.2288738504251258, - "pages_per_minute": 1.1077596151665254 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.10179640718562874, - "wer": 0.3008849557522124 - }, - { - "page_id": "phys_0002", - "cer": 0.05238095238095238, - "wer": 0.1568627450980392 - } - ] - } - }, - { - "eval_workflow_id": "wf-data18_frak_simple_lessing_menschengeschlecht_1780_selected_pages_ocr-eval", - "label": "Workflow on data 18_frak_simple_lessing_menschengeschlecht_1780_selected_pages_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_simple.ocrd.zip", - "label": "GT workspace 18th century Black letter simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_simple_lessing_menschengeschlecht_1780_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_simple_lessing_menschengeschlecht_1780_selected_pages_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_simple_lessing_menschengeschlecht_1780_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_simple_lessing_menschengeschlecht_1780_selected_pages_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 301, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", - "params": { - "level-of-operation": "page", - "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-cis-ocropy-segment", - "params": { - "level-of-operation": "page", - "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-calamari-recognize", - "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 - } - } - ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 1, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 23.314418, - "cpu_time": 34.297031000000004, - "cer_mean": 0.02493765586034913, - "cer_median": 0.02493765586034913, - "cer_range": [ - 0.02493765586034913, - 0.02493765586034913 - ], - "cer_standard_deviation": null, - "wer": 0.09836065573770492, - "pages_per_minute": 2.5735148095912153 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.02493765586034913, - "wer": 0.09836065573770492 - } - ] - } - }, - { - "eval_workflow_id": "wf-data16_frak_simple_luther_auszlegunge_1520_selected_pages_ocr-eval", - "label": "Workflow on data 16_frak_simple_luther_auszlegunge_1520_selected_pages_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_frak_simple.ocrd.zip", - "label": "GT workspace 16th century Black letter simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_luther_auszlegunge_1520_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 16_frak_simple_luther_auszlegunge_1520_selected_pages_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_luther_auszlegunge_1520_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_frak_simple_luther_auszlegunge_1520_selected_pages_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 301, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", - "params": { - "level-of-operation": "page", - "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-cis-ocropy-segment", - "params": { - "level-of-operation": "page", - "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-calamari-recognize", - "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 - } - } - ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1500-1600", - "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 2, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 106.636709, - "cpu_time": 141.711402, - "cer_mean": 0.23276787879487743, - "cer_median": 0.23276787879487743, - "cer_range": [ - 0.07704654895666131, - 0.38848920863309355 - ], - "cer_standard_deviation": 0.22022321660797936, - "wer": 0.33520737327188943, - "pages_per_minute": 1.125316048528842 - }, - "by_page": [ - { - "page_id": "phys_0003", - "cer": 0.38848920863309355, - "wer": 0.44 - }, - { - "page_id": "phys_0029", - "cer": 0.07704654895666131, - "wer": 0.2304147465437788 - } - ] - } - }, - { - "eval_workflow_id": "wf-data17_frak_simple_calvi_beutelschneider01_1627_minimal_ocr-eval", - "label": "Workflow on data 17_frak_simple_calvi_beutelschneider01_1627_minimal_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/17_frak_simple.ocrd.zip", - "label": "GT workspace 17th century Black letter simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_simple_calvi_beutelschneider01_1627_minimal_ocr_ocr.zip", - "label": "OCR workspace for 17_frak_simple_calvi_beutelschneider01_1627_minimal_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_simple_calvi_beutelschneider01_1627_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 17_frak_simple_calvi_beutelschneider01_1627_minimal_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-tesserocr-recognize", - "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1600-1700", - "publication_decade": "", - "publication_year": "17th century", - "number_of_pages": 3, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 7.886743, - "cpu_time": 10.460914, - "cer_mean": 0.08427970798416445, - "cer_median": 0.09331797235023041, - "cer_range": [ - 0.05025996533795494, - 0.10926118626430802 - ], - "cer_standard_deviation": 0.030521364398782277, - "wer": 0.1842438386542771, - "pages_per_minute": 22.82310961571843 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.10926118626430802, - "wer": 0.2229299363057325 - }, - { - "page_id": "phys_0002", - "cer": 0.09331797235023041, - "wer": 0.20689655172413793 - }, - { - "page_id": "phys_0003", - "cer": 0.05025996533795494, - "wer": 0.12290502793296089 - } - ] - } - }, - { - "eval_workflow_id": "wf-data19_ant_simple_blumenbach_anatomie_1805_minimal_ocr-eval", - "label": "Workflow on data 19_ant_simple_blumenbach_anatomie_1805_minimal_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/19_ant_simple.ocrd.zip", - "label": "GT workspace 19th century Antiqua simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_ant_simple_blumenbach_anatomie_1805_minimal_ocr_ocr.zip", - "label": "OCR workspace for 19_ant_simple_blumenbach_anatomie_1805_minimal_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_ant_simple_blumenbach_anatomie_1805_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 19_ant_simple_blumenbach_anatomie_1805_minimal_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-tesserocr-recognize", - "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1800-1900", - "publication_decade": "", - "publication_year": "19th century", - "number_of_pages": 3, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 12.28958, - "cpu_time": 14.798878, - "cer_mean": 0.08328200324172261, - "cer_median": 0.08736842105263158, - "cer_range": [ - 0.04055496264674493, - 0.12192262602579132 - ], - "cer_standard_deviation": 0.04083746158658049, - "wer": 0.23519468186134854, - "pages_per_minute": 14.646554235376634 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.08736842105263158, - "wer": 0.22666666666666666 - }, - { - "page_id": "phys_0002", - "cer": 0.04055496264674493, - "wer": 0.14814814814814814 - }, - { - "page_id": "phys_0003", - "cer": 0.12192262602579132, - "wer": 0.33076923076923076 - } - ] - } - }, - { - "eval_workflow_id": "wf-data18_frak_complex_nn_besuch_1780_minimal_ocr-eval", - "label": "Workflow on data 18_frak_complex_nn_besuch_1780_minimal_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", - "label": "GT workspace 18th century Black letter complex layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_nn_besuch_1780_minimal_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_complex_nn_besuch_1780_minimal_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_nn_besuch_1780_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_complex_nn_besuch_1780_minimal_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-tesserocr-recognize", - "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 4, - "layout": "complex" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 13.367254, - "cpu_time": 16.145112, - "cer_mean": 0.06315407734401027, - "cer_median": 0.026726016076928857, - "cer_range": [ - 0.01878453038674033, - 0.18037974683544303 - ], - "cer_standard_deviation": 0.07825196427362012, - "wer": 0.11662846322503488, - "pages_per_minute": 17.95432330379897 - }, - "by_page": [ - { - "page_id": "phys_00001", - "cer": 0.18037974683544303, - "wer": 0.1896551724137931 - }, - { - "page_id": "phys_00002", - "cer": 0.01878453038674033, - "wer": 0.08670520231213873 - }, - { - "page_id": "phys_00003", - "cer": 0.02505446623093682, - "wer": 0.08670520231213873 - }, - { - "page_id": "phys_00004", - "cer": 0.028397565922920892, - "wer": 0.10344827586206896 - } - ] - } - }, - { - "eval_workflow_id": "wf-data16_frak_simple_kistler_kraeuter_1500_minimal_ocr-eval", - "label": "Workflow on data 16_frak_simple_kistler_kraeuter_1500_minimal_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_frak_simple.ocrd.zip", - "label": "GT workspace 16th century Black letter simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_kistler_kraeuter_1500_minimal_ocr_ocr.zip", - "label": "OCR workspace for 16_frak_simple_kistler_kraeuter_1500_minimal_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_kistler_kraeuter_1500_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_frak_simple_kistler_kraeuter_1500_minimal_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-tesserocr-recognize", - "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1500-1600", - "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 2, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 8.741153, - "cpu_time": 10.631666, - "cer_mean": 0.11307713146793608, - "cer_median": 0.11307713146793608, - "cer_range": [ - 0.09135802469135802, - 0.13479623824451412 - ], - "cer_standard_deviation": 0.03071545536606607, - "wer": 0.35497572815533984, - "pages_per_minute": 13.728166066879277 - }, - "by_page": [ - { - "page_id": "phys_0007", - "cer": 0.09135802469135802, - "wer": 0.33495145631067963 - }, - { - "page_id": "phys_0021", - "cer": 0.13479623824451412, - "wer": 0.375 - } - ] - } - }, - { - "eval_workflow_id": "wf-data16_frak_simple_kistler_kraeuter_1500_selected_pages_ocr-eval", - "label": "Workflow on data 16_frak_simple_kistler_kraeuter_1500_selected_pages_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_frak_simple.ocrd.zip", - "label": "GT workspace 16th century Black letter simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_kistler_kraeuter_1500_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 16_frak_simple_kistler_kraeuter_1500_selected_pages_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_kistler_kraeuter_1500_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_frak_simple_kistler_kraeuter_1500_selected_pages_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 301, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", - "params": { - "level-of-operation": "page", - "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-cis-ocropy-segment", - "params": { - "level-of-operation": "page", - "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-calamari-recognize", - "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 - } - } - ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1500-1600", - "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 2, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 81.76006000000001, - "cpu_time": 115.33805, - "cer_mean": 0.05713778913012629, - "cer_median": 0.05713778913012629, - "cer_range": [ - 0.05172413793103448, - 0.06255144032921811 - ], - "cer_standard_deviation": 0.0076560589477130125, - "wer": 0.24251618122977348, - "pages_per_minute": 1.4677092947338832 - }, - "by_page": [ - { - "page_id": "phys_0007", - "cer": 0.06255144032921811, - "wer": 0.2766990291262136 - }, - { - "page_id": "phys_0021", - "cer": 0.05172413793103448, - "wer": 0.20833333333333334 - } - ] - } - }, - { - "eval_workflow_id": "wf-data18_frak_complex_estor_rechtsgelehrsamkeit02_1758_slower_processors_ocr-eval", - "label": "Workflow on data 18_frak_complex_estor_rechtsgelehrsamkeit02_1758_slower_processors_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", - "label": "GT workspace 18th century Black letter complex layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_estor_rechtsgelehrsamkeit02_1758_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_complex_estor_rechtsgelehrsamkeit02_1758_slower_processors_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_estor_rechtsgelehrsamkeit02_1758_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_complex_estor_rechtsgelehrsamkeit02_1758_slower_processors_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-cis-ocropy-denoise", - "params": { - "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-tesserocr-segment", - "params": { - "dpi": 0, - "padding": 4, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-tesserocr-recognize", - "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "segmentation_level": "word", - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 4, - "layout": "complex" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 63.120688, - "cpu_time": 99.46367699999999, - "cer_mean": 0.2886254685112176, - "cer_median": 0.2965426192770805, - "cer_range": [ - 0.07856341189674523, - 0.4828532235939643 - ], - "cer_standard_deviation": 0.204671750373206, - "wer": 0.4238352166683633, - "pages_per_minute": 3.802239924888018 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.4828532235939643, - "wer": 0.5614035087719298 - }, - { - "page_id": "phys_0002", - "cer": 0.14873239436619717, - "wer": 0.2695035460992908 - }, - { - "page_id": "phys_0003", - "cer": 0.07856341189674523, - "wer": 0.22807017543859648 - }, - { - "page_id": "phys_0004", - "cer": 0.4443528441879637, - "wer": 0.6363636363636364 - } - ] - } - }, - { - "eval_workflow_id": "wf-data18_frak_complex_bernd_lebensbeschreibung_1738_selected_pages_ocr-eval", - "label": "Workflow on data 18_frak_complex_bernd_lebensbeschreibung_1738_selected_pages_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", - "label": "GT workspace 18th century Black letter complex layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_bernd_lebensbeschreibung_1738_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_complex_bernd_lebensbeschreibung_1738_selected_pages_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_bernd_lebensbeschreibung_1738_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_complex_bernd_lebensbeschreibung_1738_selected_pages_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 301, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", - "params": { - "level-of-operation": "page", - "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-cis-ocropy-segment", - "params": { - "level-of-operation": "page", - "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-calamari-recognize", - "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 - } - } - ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 3, - "layout": "complex" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 81.051665, - "cpu_time": 123.14032399999999, - "cer_mean": 0.08296660948260859, - "cer_median": 0.03058103975535168, - "cer_range": [ - 0.02522935779816514, - 0.19308943089430894 - ], - "cer_standard_deviation": 0.09540669248903694, - "wer": 0.16153231203986018, - "pages_per_minute": 2.220805704608289 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.19308943089430894, - "wer": 0.38235294117647056 - }, - { - "page_id": "phys_0002", - "cer": 0.03058103975535168, - "wer": 0.05357142857142857 - }, - { - "page_id": "phys_0003", - "cer": 0.02522935779816514, - "wer": 0.048672566371681415 - } - ] - } - }, - { - "eval_workflow_id": "wf-data16_ant_complex_alberti_pictura_1540_selected_pages_ocr-eval", - "label": "Workflow on data 16_ant_complex_alberti_pictura_1540_selected_pages_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_ant_complex.ocrd.zip", - "label": "GT workspace 16th century Antiqua complex layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_complex_alberti_pictura_1540_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 16_ant_complex_alberti_pictura_1540_selected_pages_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_complex_alberti_pictura_1540_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_ant_complex_alberti_pictura_1540_selected_pages_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 301, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", - "params": { - "level-of-operation": "page", - "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-cis-ocropy-segment", - "params": { - "level-of-operation": "page", - "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-calamari-recognize", - "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 - } - } - ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" + "Antiqua" ], - "publication_century": "1500-1600", + "publication_century": "1820-1939", "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 3, - "layout": "complex" + "publication_year": "19th century", + "number_of_pages": 5, + "layout": "reichsanzeiger-gt" } } }, "evaluation_results": { "document_wide": { - "wall_time": 69.338131, - "cpu_time": 122.13893500000002, - "cer_mean": 0.11095656062741122, - "cer_median": 0.12244897959183673, + "wall_time": 530.564159, + "cpu_time": 724.869668, + "cer_mean": 1.479972651687655, + "cer_median": 1.331401349741961, "cer_range": [ - 0.07700101317122594, - 0.133419689119171 + 1.090097148229395, + 1.9778823058446757 ], - "cer_standard_deviation": 0.02991360090611332, - "wer": 0.26341315349276156, - "pages_per_minute": 2.595974212226747 + "cer_standard_deviation": 0.3854879135899152, + "wer": 1.7910299442503619, + "pages_per_minute": 0.5654358571175178 }, "by_page": [ { - "page_id": "phys_0007", - "cer": 0.133419689119171, - "wer": 0.36363636363636365 - }, - { - "page_id": "phys_0008", - "cer": 0.07700101317122594, - "wer": 0.18012422360248448 - }, - { - "page_id": "phys_0009", - "cer": 0.12244897959183673, - "wer": 0.24647887323943662 - } - ] - } - }, - { - "eval_workflow_id": "wf-data18_frak_complex_buerger_gedichte_1778_selected_pages_ocr-eval", - "label": "Workflow on data 18_frak_complex_buerger_gedichte_1778_selected_pages_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", - "label": "GT workspace 18th century Black letter complex layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_buerger_gedichte_1778_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_complex_buerger_gedichte_1778_selected_pages_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_buerger_gedichte_1778_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_complex_buerger_gedichte_1778_selected_pages_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 301, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", - "params": { - "level-of-operation": "page", - "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } + "page_id": "P_1877_7_0059", + "cer": 1.090097148229395, + "wer": 1.4218399401645474 }, { - "id": "ocrd-cis-ocropy-segment", - "params": { - "level-of-operation": "page", - "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 - } + "page_id": "P_1883_55_0044", + "cer": 1.2073319135990264, + "wer": 1.4705882352941178 }, { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } + "page_id": "P_1929_250_0019", + "cer": 1.331401349741961, + "wer": 1.7407221664994985 }, { - "id": "ocrd-calamari-recognize", - "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 - } - } - ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 2, - "layout": "complex" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 46.097406, - "cpu_time": 67.55636299999999, - "cer_mean": 0.15032645549695894, - "cer_median": 0.15032645549695894, - "cer_range": [ - 0.04932735426008968, - 0.2513255567338282 - ], - "cer_standard_deviation": 0.14283429875667375, - "wer": 0.3456967605903776, - "pages_per_minute": 2.60318335482912 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.2513255567338282, - "wer": 0.475177304964539 + "page_id": "P_1932_300_0488", + "cer": 1.9778823058446757, + "wer": 2.349493487698987 }, { - "page_id": "phys_0002", - "cer": 0.04932735426008968, - "wer": 0.21621621621621623 + "page_id": "P_1936_123_0292", + "cer": 1.7931505410232167, + "wer": 1.9725058915946583 } ] } }, { - "eval_workflow_id": "wf-data16_frak_simple_trota_mordtbrenner_1540_minimal_ocr-eval", - "label": "Workflow on data 16_frak_simple_trota_mordtbrenner_1540_minimal_ocr", + "eval_workflow_id": "wf-datasilesius_seelenlust01_1657-eval", + "label": "Workflow on data silesius_seelenlust01_1657", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", @@ -5923,16 +122,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_frak_simple.ocrd.zip", - "label": "GT workspace 16th century Black letter simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/silesius_seelenlust01_1657.ocrd.zip", + "label": "GT workspace silesius_seelenlust01_1657" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_trota_mordtbrenner_1540_minimal_ocr_ocr.zip", - "label": "OCR workspace for 16_frak_simple_trota_mordtbrenner_1540_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/silesius_seelenlust01_1657_ocr.zip", + "label": "OCR workspace for silesius_seelenlust01_1657" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_trota_mordtbrenner_1540_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_frak_simple_trota_mordtbrenner_1540_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/silesius_seelenlust01_1657_evaluation.zip", + "label": "Evaluation workspace for silesius_seelenlust01_1657" }, "workflow_steps": [ { @@ -5968,385 +167,279 @@ "data_properties": { "fonts": [ "Antiqua", - "Black Letter" + "Fraktur" ], - "publication_century": "1500-1600", + "publication_century": "1600-1700", "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 2, - "layout": "simple" + "publication_year": "17th century", + "number_of_pages": 5, + "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 7.257076, - "cpu_time": 8.706393, - "cer_mean": 0.043071000855431994, - "cer_median": 0.043071000855431994, + "wall_time": 13.583684, + "cpu_time": 17.039027, + "cer_mean": 0.3007055286105995, + "cer_median": 0.2951219512195122, "cer_range": [ - 0.014285714285714285, - 0.0718562874251497 + 0.19271623672230653, + 0.44970414201183434 ], - "cer_standard_deviation": 0.04070854266369089, - "wer": 0.10714905431199029, - "pages_per_minute": 16.53558540657422 + "cer_standard_deviation": 0.10657123719012947, + "wer": 0.5174305966287508, + "pages_per_minute": 22.08531941703002 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.0718562874251497, - "wer": 0.19469026548672566 + "cer": 0.44970414201183434, + "wer": 0.7101449275362319 }, { "page_id": "phys_0002", - "cer": 0.014285714285714285, - "wer": 0.0196078431372549 + "cer": 0.20913884007029876, + "wer": 0.5104166666666666 + }, + { + "page_id": "phys_0003", + "cer": 0.2951219512195122, + "wer": 0.4647887323943662 + }, + { + "page_id": "phys_0004", + "cer": 0.19271623672230653, + "wer": 0.3870967741935484 + }, + { + "page_id": "phys_0005", + "cer": 0.35684647302904565, + "wer": 0.5147058823529411 } ] } }, { - "eval_workflow_id": "wf-data18_ant_simple_ballenstedt_delatio_1777_selected_pages_ocr-eval", - "label": "Workflow on data 18_ant_simple_ballenstedt_delatio_1777_selected_pages_ocr", + "eval_workflow_id": "wf-databenner_herrnhuterey04_1748-eval", + "label": "Workflow on data benner_herrnhuterey04_1748", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_ant_simple.ocrd.zip", - "label": "GT workspace 18th century Antiqua simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/benner_herrnhuterey04_1748.ocrd.zip", + "label": "GT workspace benner_herrnhuterey04_1748" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_ant_simple_ballenstedt_delatio_1777_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 18_ant_simple_ballenstedt_delatio_1777_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/benner_herrnhuterey04_1748_ocr.zip", + "label": "OCR workspace for benner_herrnhuterey04_1748" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_ant_simple_ballenstedt_delatio_1777_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_ant_simple_ballenstedt_delatio_1777_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/benner_herrnhuterey04_1748_evaluation.zip", + "label": "Evaluation workspace for benner_herrnhuterey04_1748" }, "workflow_steps": [ { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 301, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", - "params": { - "level-of-operation": "page", - "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-cis-ocropy-segment", - "params": { - "level-of-operation": "page", - "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", + "id": "ocrd-tesserocr-recognize", "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-calamari-recognize", - "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" } } ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", "document_metadata": { "data_properties": { "fonts": [ "Antiqua", - "Black Letter" + "Fraktur" ], "publication_century": "1700-1800", "publication_decade": "", "publication_year": "18th century", - "number_of_pages": 3, - "layout": "simple" + "number_of_pages": 4, + "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 189.99469000000002, - "cpu_time": 255.53890399999997, - "cer_mean": 0.03947936370768366, - "cer_median": 0.0376249265138154, + "wall_time": 19.971749, + "cpu_time": 23.245253, + "cer_mean": 0.20433470587543373, + "cer_median": 0.1700173533179325, "cer_range": [ - 0.022336769759450172, - 0.05847639484978541 + 0.04063745019920319, + 0.43666666666666665 ], - "cer_standard_deviation": 0.01814103989293333, - "wer": 0.15140780173936938, - "pages_per_minute": 0.947394898246893 + "cer_standard_deviation": 0.19097881579976753, + "wer": 0.31449770246107817, + "pages_per_minute": 12.016974577439363 }, "by_page": [ { - "page_id": "phys_00003", - "cer": 0.022336769759450172, - "wer": 0.10970464135021098 + "page_id": "phys_0001", + "cer": 0.43666666666666665, + "wer": 0.583941605839416 }, { - "page_id": "phys_00005", - "cer": 0.0376249265138154, - "wer": 0.16942148760330578 + "page_id": "phys_0002", + "cer": 0.04063745019920319, + "wer": 0.10344827586206896 }, { - "page_id": "phys_00010", - "cer": 0.05847639484978541, - "wer": 0.17509727626459143 + "page_id": "phys_0003", + "cer": 0.05536912751677853, + "wer": 0.18435754189944134 + }, + { + "page_id": "phys_0004", + "cer": 0.28466557911908646, + "wer": 0.3862433862433862 } ] } }, { - "eval_workflow_id": "wf-data17_frak_simple_calvi_beutelschneider01_1627_selected_pages_ocr-eval", - "label": "Workflow on data 17_frak_simple_calvi_beutelschneider01_1627_selected_pages_ocr", + "eval_workflow_id": "wf-databohse_helicon_1696-eval", + "label": "Workflow on data bohse_helicon_1696", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/17_frak_simple.ocrd.zip", - "label": "GT workspace 17th century Black letter simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_simple_calvi_beutelschneider01_1627_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 17_frak_simple_calvi_beutelschneider01_1627_selected_pages_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_simple_calvi_beutelschneider01_1627_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 17_frak_simple_calvi_beutelschneider01_1627_selected_pages_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 133, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", - "params": { - "level-of-operation": "page", - "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-cis-ocropy-segment", - "params": { - "level-of-operation": "page", - "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 - } - }, + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/bohse_helicon_1696.ocrd.zip", + "label": "GT workspace bohse_helicon_1696" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/bohse_helicon_1696_ocr.zip", + "label": "OCR workspace for bohse_helicon_1696" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/bohse_helicon_1696_evaluation.zip", + "label": "Evaluation workspace for bohse_helicon_1696" + }, + "workflow_steps": [ { - "id": "ocrd-cis-ocropy-dewarp", + "id": "ocrd-tesserocr-recognize", "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-calamari-recognize", - "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" } } ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", "document_metadata": { "data_properties": { "fonts": [ "Antiqua", - "Black Letter" + "Fraktur" ], "publication_century": "1600-1700", "publication_decade": "", "publication_year": "17th century", - "number_of_pages": 3, + "number_of_pages": 5, "layout": "simple" } } }, "evaluation_results": { "document_wide": { - "wall_time": 88.38021099999999, - "cpu_time": 127.13157100000001, - "cer_mean": 0.047703401203510305, - "cer_median": 0.04723502304147465, + "wall_time": 16.040643, + "cpu_time": 19.265802, + "cer_mean": 0.40403175304113403, + "cer_median": 0.45517241379310347, "cer_range": [ - 0.04592720970537262, - 0.04994797086368366 + 0.1955040871934605, + 0.5079365079365079 ], - "cer_standard_deviation": 0.002050893378518156, - "wer": 0.17277993391305632, - "pages_per_minute": 2.0366550154536296 + "cer_standard_deviation": 0.12207840266111031, + "wer": 0.5356334009868995, + "pages_per_minute": 18.70249216318822 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.04994797086368366, - "wer": 0.21019108280254778 + "cer": 0.5079365079365079, + "wer": 0.6382978723404256 }, { "page_id": "phys_0002", - "cer": 0.04723502304147465, - "wer": 0.15172413793103448 + "cer": 0.40540540540540543, + "wer": 0.5740740740740741 }, { "page_id": "phys_0003", - "cer": 0.04592720970537262, - "wer": 0.1564245810055866 + "cer": 0.45517241379310347, + "wer": 0.5467625899280576 + }, + { + "page_id": "phys_0004", + "cer": 0.45614035087719296, + "wer": 0.6018518518518519 + }, + { + "page_id": "phys_0005", + "cer": 0.1955040871934605, + "wer": 0.31718061674008813 } ] } }, { - "eval_workflow_id": "wf-data18_frak_complex_buerger_gedichte_1778_minimal_ocr-eval", - "label": "Workflow on data 18_frak_complex_buerger_gedichte_1778_minimal_ocr", + "eval_workflow_id": "wf-databuerger_gedichte_1778-eval", + "label": "Workflow on data buerger_gedichte_1778", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", @@ -6357,16 +450,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", - "label": "GT workspace 18th century Black letter complex layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/buerger_gedichte_1778.ocrd.zip", + "label": "GT workspace buerger_gedichte_1778" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_buerger_gedichte_1778_minimal_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_complex_buerger_gedichte_1778_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/buerger_gedichte_1778_ocr.zip", + "label": "OCR workspace for buerger_gedichte_1778" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_buerger_gedichte_1778_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_complex_buerger_gedichte_1778_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/buerger_gedichte_1778_evaluation.zip", + "label": "Evaluation workspace for buerger_gedichte_1778" }, "workflow_steps": [ { @@ -6402,7 +495,7 @@ "data_properties": { "fonts": [ "Antiqua", - "Black Letter" + "Fraktur" ], "publication_century": "1700-1800", "publication_decade": "", @@ -6414,8 +507,8 @@ }, "evaluation_results": { "document_wide": { - "wall_time": 6.016163, - "cpu_time": 7.291227, + "wall_time": 5.921714, + "cpu_time": 7.016172, "cer_mean": 0.10672693293515115, "cer_median": 0.10672693293515115, "cer_range": [ @@ -6424,7 +517,7 @@ ], "cer_standard_deviation": 0.08751702749046443, "wer": 0.23135901859306116, - "pages_per_minute": 19.94626807817541 + "pages_per_minute": 20.26440317786371 }, "by_page": [ { @@ -6441,8 +534,8 @@ } }, { - "eval_workflow_id": "wf-data18_ant_simple_ballenstedt_delatio_1777_minimal_ocr-eval", - "label": "Workflow on data 18_ant_simple_ballenstedt_delatio_1777_minimal_ocr", + "eval_workflow_id": "wf-datarollenhagen_reysen_1603-eval", + "label": "Workflow on data rollenhagen_reysen_1603", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", @@ -6453,16 +546,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_ant_simple.ocrd.zip", - "label": "GT workspace 18th century Antiqua simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/rollenhagen_reysen_1603.ocrd.zip", + "label": "GT workspace rollenhagen_reysen_1603" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_ant_simple_ballenstedt_delatio_1777_minimal_ocr_ocr.zip", - "label": "OCR workspace for 18_ant_simple_ballenstedt_delatio_1777_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/rollenhagen_reysen_1603_ocr.zip", + "label": "OCR workspace for rollenhagen_reysen_1603" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_ant_simple_ballenstedt_delatio_1777_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_ant_simple_ballenstedt_delatio_1777_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/rollenhagen_reysen_1603_evaluation.zip", + "label": "Evaluation workspace for rollenhagen_reysen_1603" }, "workflow_steps": [ { @@ -6498,11 +591,11 @@ "data_properties": { "fonts": [ "Antiqua", - "Black Letter" + "Fraktur" ], - "publication_century": "1700-1800", + "publication_century": "1600-1700", "publication_decade": "", - "publication_year": "18th century", + "publication_year": "17th century", "number_of_pages": 3, "layout": "simple" } @@ -6510,40 +603,40 @@ }, "evaluation_results": { "document_wide": { - "wall_time": 24.13474, - "cpu_time": 27.011507, - "cer_mean": 0.06438361522903834, - "cer_median": 0.03969957081545064, + "wall_time": 16.777821, + "cpu_time": 19.16547, + "cer_mean": 0.21046220070684576, + "cer_median": 0.16929133858267717, "cer_range": [ - 0.021764032073310423, - 0.13168724279835392 + 0.14512471655328799, + 0.3169705469845722 ], - "cer_standard_deviation": 0.058972490200809365, - "wer": 0.16906902212925057, - "pages_per_minute": 7.458128821773095 + "cer_standard_deviation": 0.093027024434784, + "wer": 0.3190752126565147, + "pages_per_minute": 10.728449183001775 }, "by_page": [ { - "page_id": "phys_00003", - "cer": 0.021764032073310423, - "wer": 0.12236286919831224 + "page_id": "phys_0001", + "cer": 0.3169705469845722, + "wer": 0.4649122807017544 }, { - "page_id": "phys_00005", - "cer": 0.13168724279835392, - "wer": 0.2603305785123967 + "page_id": "phys_0002", + "cer": 0.16929133858267717, + "wer": 0.28104575163398693 }, { - "page_id": "phys_00010", - "cer": 0.03969957081545064, - "wer": 0.1245136186770428 + "page_id": "phys_0003", + "cer": 0.14512471655328799, + "wer": 0.2112676056338028 } ] } }, { - "eval_workflow_id": "wf-data17_frak_complex_huebner_handbuch_1696_minimal_ocr-eval", - "label": "Workflow on data 17_frak_complex_huebner_handbuch_1696_minimal_ocr", + "eval_workflow_id": "wf-dataeuler_rechenkunst01_1738-eval", + "label": "Workflow on data euler_rechenkunst01_1738", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", @@ -6554,16 +647,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/17_frak_complex.ocrd.zip", - "label": "GT workspace 17th century Black letter complex layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/euler_rechenkunst01_1738.ocrd.zip", + "label": "GT workspace euler_rechenkunst01_1738" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_huebner_handbuch_1696_minimal_ocr_ocr.zip", - "label": "OCR workspace for 17_frak_complex_huebner_handbuch_1696_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/euler_rechenkunst01_1738_ocr.zip", + "label": "OCR workspace for euler_rechenkunst01_1738" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_huebner_handbuch_1696_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 17_frak_complex_huebner_handbuch_1696_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/euler_rechenkunst01_1738_evaluation.zip", + "label": "Evaluation workspace for euler_rechenkunst01_1738" }, "workflow_steps": [ { @@ -6599,134 +692,200 @@ "data_properties": { "fonts": [ "Antiqua", - "Black Letter" + "Fraktur" ], - "publication_century": "1600-1700", + "publication_century": "1700-1800", "publication_decade": "", - "publication_year": "17th century", - "number_of_pages": 3, + "publication_year": "18th century", + "number_of_pages": 6, "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 16.954402, - "cpu_time": 19.205537, - "cer_mean": 0.20627969569747484, - "cer_median": 0.08702290076335878, + "wall_time": 18.514995, + "cpu_time": 23.577633, + "cer_mean": 0.25835849983393794, + "cer_median": 0.230420483908856, "cer_range": [ - 0.08487084870848709, - 0.44694533762057875 + 0.08586296617519515, + 0.5747368421052632 ], - "cer_standard_deviation": 0.20842533731220508, - "wer": 0.3271033769383102, - "pages_per_minute": 10.616711813250621 + "cer_standard_deviation": 0.16915619139008603, + "wer": 0.37827288927088376, + "pages_per_minute": 19.443699552713895 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.44694533762057875, - "wer": 0.6341463414634146 + "cer": 0.08586296617519515, + "wer": 0.10465116279069768 }, { "page_id": "phys_0002", - "cer": 0.08702290076335878, - "wer": 0.1504424778761062 + "cer": 0.5747368421052632, + "wer": 0.7987804878048781 }, { "page_id": "phys_0003", - "cer": 0.08487084870848709, - "wer": 0.19672131147540983 + "cer": 0.2767102229054573, + "wer": 0.3786407766990291 + }, + { + "page_id": "phys_0004", + "cer": 0.22828282828282828, + "wer": 0.27741935483870966 + }, + { + "page_id": "phys_0005", + "cer": 0.152, + "wer": 0.27218934911242604 + }, + { + "page_id": "phys_0006", + "cer": 0.23255813953488372, + "wer": 0.43795620437956206 } ] } }, { - "eval_workflow_id": "wf-data19_frak_simple_arnimb_goethe03_1835_slower_processors_ocr-eval", - "label": "Workflow on data 19_frak_simple_arnimb_goethe03_1835_slower_processors_ocr", + "eval_workflow_id": "wf-datakistler_kraeuter_1500-eval", + "label": "Workflow on data kistler_kraeuter_1500", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/19_frak_simple.ocrd.zip", - "label": "GT workspace 19th century Black letter simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/kistler_kraeuter_1500.ocrd.zip", + "label": "GT workspace kistler_kraeuter_1500" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_frak_simple_arnimb_goethe03_1835_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 19_frak_simple_arnimb_goethe03_1835_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/kistler_kraeuter_1500_ocr.zip", + "label": "OCR workspace for kistler_kraeuter_1500" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_frak_simple_arnimb_goethe03_1835_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 19_frak_simple_arnimb_goethe03_1835_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/kistler_kraeuter_1500_evaluation.zip", + "label": "Evaluation workspace for kistler_kraeuter_1500" }, "workflow_steps": [ { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", + "id": "ocrd-tesserocr-recognize", "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" } - }, + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 2, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 7.833456, + "cpu_time": 9.538595, + "cer_mean": 0.1809783660358373, + "cer_median": 0.1809783660358373, + "cer_range": [ + 0.13479623824451412, + 0.2271604938271605 + ], + "cer_standard_deviation": 0.06531139146173669, + "wer": 0.408373786407767, + "pages_per_minute": 15.318909048573197 + }, + "by_page": [ { - "id": "ocrd-cis-ocropy-denoise", - "params": { - "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } + "page_id": "phys_0007", + "cer": 0.2271604938271605, + "wer": 0.441747572815534 }, { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, + "page_id": "phys_0021", + "cer": 0.13479623824451412, + "wer": 0.375 + } + ] + } + }, + { + "eval_workflow_id": "wf-datacalvi_beutelschneider01_1627-eval", + "label": "Workflow on data calvi_beutelschneider01_1627", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/calvi_beutelschneider01_1627.ocrd.zip", + "label": "GT workspace calvi_beutelschneider01_1627" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/calvi_beutelschneider01_1627_ocr.zip", + "label": "OCR workspace for calvi_beutelschneider01_1627" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/calvi_beutelschneider01_1627_evaluation.zip", + "label": "Evaluation workspace for calvi_beutelschneider01_1627" + }, + "workflow_steps": [ { - "id": "ocrd-tesserocr-segment", + "id": "ocrd-tesserocr-recognize", "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", "dpi": 0, - "padding": 4, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, - "find_tables": true, "find_staves": false, "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, "raw_lines": false, "char_whitelist": "", "char_blacklist": "", @@ -6737,29 +896,95 @@ "auto_model": false, "oem": "DEFAULT" } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1600-1700", + "publication_decade": "", + "publication_year": "17th century", + "number_of_pages": 3, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 7.37275, + "cpu_time": 9.77562, + "cer_mean": 0.08254540344028655, + "cer_median": 0.09331797235023041, + "cer_range": [ + 0.05025996533795494, + 0.1040582726326743 + ], + "cer_standard_deviation": 0.02847104928930229, + "wer": 0.1821206964037463, + "pages_per_minute": 24.41422806958055 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.1040582726326743, + "wer": 0.21656050955414013 }, { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } + "page_id": "phys_0002", + "cer": 0.09331797235023041, + "wer": 0.20689655172413793 }, + { + "page_id": "phys_0003", + "cer": 0.05025996533795494, + "wer": 0.12290502793296089 + } + ] + } + }, + { + "eval_workflow_id": "wf-dataarnimb_goethe03_1835-eval", + "label": "Workflow on data arnimb_goethe03_1835", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/arnimb_goethe03_1835.ocrd.zip", + "label": "GT workspace arnimb_goethe03_1835" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/arnimb_goethe03_1835_ocr.zip", + "label": "OCR workspace for arnimb_goethe03_1835" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/arnimb_goethe03_1835_evaluation.zip", + "label": "Evaluation workspace for arnimb_goethe03_1835" + }, + "workflow_steps": [ { "id": "ocrd-tesserocr-recognize", "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, "model": "Fraktur_GT4HistOCR", "dpi": 0, "padding": 0, - "segmentation_level": "word", + "overwrite_segments": false, "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, - "find_tables": true, "find_staves": false, "sparse_text": false, "raw_lines": false, @@ -6779,7 +1004,7 @@ "document_metadata": { "data_properties": { "fonts": [ - "Black Letter" + "Fraktur" ], "publication_century": "1800-1900", "publication_decade": "", @@ -6791,112 +1016,67 @@ }, "evaluation_results": { "document_wide": { - "wall_time": 12.164379999999998, - "cpu_time": 19.670232, - "cer_mean": 0.0056657223796034, - "cer_median": 0.0056657223796034, + "wall_time": 4.707975, + "cpu_time": 5.426059, + "cer_mean": 0.004721435316336166, + "cer_median": 0.004721435316336166, "cer_range": [ - 0.0056657223796034, - 0.0056657223796034 + 0.004721435316336166, + 0.004721435316336166 ], "cer_standard_deviation": null, - "wer": 0.031746031746031744, - "pages_per_minute": 4.932433876613524 + "wer": 0.015873015873015872, + "pages_per_minute": 12.744332754528221 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.0056657223796034, - "wer": 0.031746031746031744 + "cer": 0.004721435316336166, + "wer": 0.015873015873015872 } ] } }, { - "eval_workflow_id": "wf-data17_frak_complex_silesius_seelenlust01_1657_slower_processors_ocr-eval", - "label": "Workflow on data 17_frak_complex_silesius_seelenlust01_1657_slower_processors_ocr", + "eval_workflow_id": "wf-datablumenbach_anatomie_1805-eval", + "label": "Workflow on data blumenbach_anatomie_1805", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/17_frak_complex.ocrd.zip", - "label": "GT workspace 17th century Black letter complex layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/blumenbach_anatomie_1805.ocrd.zip", + "label": "GT workspace blumenbach_anatomie_1805" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_silesius_seelenlust01_1657_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 17_frak_complex_silesius_seelenlust01_1657_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/blumenbach_anatomie_1805_ocr.zip", + "label": "OCR workspace for blumenbach_anatomie_1805" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_silesius_seelenlust01_1657_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 17_frak_complex_silesius_seelenlust01_1657_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/blumenbach_anatomie_1805_evaluation.zip", + "label": "Evaluation workspace for blumenbach_anatomie_1805" }, "workflow_steps": [ { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-cis-ocropy-denoise", - "params": { - "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-tesserocr-segment", + "id": "ocrd-tesserocr-recognize", "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", "dpi": 0, - "padding": 4, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, - "find_tables": true, "find_staves": false, "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, "raw_lines": false, "char_whitelist": "", "char_blacklist": "", @@ -6907,29 +1087,95 @@ "auto_model": false, "oem": "DEFAULT" } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1800-1900", + "publication_decade": "", + "publication_year": "19th century", + "number_of_pages": 3, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 13.404813, + "cpu_time": 15.520204, + "cer_mean": 0.08328200324172261, + "cer_median": 0.08736842105263158, + "cer_range": [ + 0.04055496264674493, + 0.12192262602579132 + ], + "cer_standard_deviation": 0.04083746158658049, + "wer": 0.23519468186134854, + "pages_per_minute": 13.428012759297722 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.08736842105263158, + "wer": 0.22666666666666666 }, { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } + "page_id": "phys_0002", + "cer": 0.04055496264674493, + "wer": 0.14814814814814814 }, + { + "page_id": "phys_0003", + "cer": 0.12192262602579132, + "wer": 0.33076923076923076 + } + ] + } + }, + { + "eval_workflow_id": "wf-datareichsanzeiger_title_pages-eval", + "label": "Workflow on data reichsanzeiger_title_pages", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/reichsanzeiger_title_pages.ocrd.zip", + "label": "GT workspace reichsanzeiger_title_pages" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_title_pages_ocr.zip", + "label": "OCR workspace for reichsanzeiger_title_pages" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_title_pages_evaluation.zip", + "label": "Evaluation workspace for reichsanzeiger_title_pages" + }, + "workflow_steps": [ { "id": "ocrd-tesserocr-recognize", "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, "model": "Fraktur_GT4HistOCR", "dpi": 0, "padding": 0, - "segmentation_level": "word", + "overwrite_segments": false, "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, - "find_tables": true, "find_staves": false, "sparse_text": false, "raw_lines": false, @@ -6949,63 +1195,62 @@ "document_metadata": { "data_properties": { "fonts": [ - "Antiqua", - "Black Letter" + "Antiqua" ], - "publication_century": "1600-1700", + "publication_century": "1820-1939", "publication_decade": "", - "publication_year": "17th century", + "publication_year": "19th century", "number_of_pages": 5, - "layout": "complex" + "layout": "reichsanzeiger-gt" } } }, "evaluation_results": { "document_wide": { - "wall_time": 51.051955, - "cpu_time": 79.096845, - "cer_mean": 0.16456973001701142, - "cer_median": 0.11229135053110774, + "wall_time": 470.537927, + "cpu_time": 603.540066, + "cer_mean": 0.5074594248191785, + "cer_median": 0.3389393598117977, "cer_range": [ - 0.05917159763313609, - 0.34146341463414637 + 0.09154875911296263, + 1.0 ], - "cer_standard_deviation": 0.11554307430913685, - "wer": 0.34312458192490203, - "pages_per_minute": 5.876366536795701 + "cer_standard_deviation": 0.36783069273412317, + "wer": 0.630009265947987, + "pages_per_minute": 0.6375681593037663 }, "by_page": [ { - "page_id": "phys_0001", - "cer": 0.05917159763313609, - "wer": 0.2463768115942029 + "page_id": "P_1881_115_0163", + "cer": 0.3377672361293631, + "wer": 0.4791666666666667 }, { - "page_id": "phys_0002", - "cer": 0.0913884007029877, - "wer": 0.28125 + "page_id": "P_1885_5_0054", + "cer": 0.09154875911296263, + "wer": 0.20966135458167331 }, { - "page_id": "phys_0003", - "cer": 0.34146341463414637, - "wer": 0.5774647887323944 + "page_id": "P_1887_134_0444", + "cer": 1.0, + "wer": 1.0 }, { - "page_id": "phys_0004", - "cer": 0.11229135053110774, - "wer": 0.1693548387096774 + "page_id": "P_1916_169_0087", + "cer": 0.3389393598117977, + "wer": 0.5072414380644062 }, { - "page_id": "phys_0005", - "cer": 0.21853388658367912, - "wer": 0.4411764705882353 + "page_id": "P_1918_267_0129", + "cer": 0.769041769041769, + "wer": 0.953976870427189 } ] } }, { - "eval_workflow_id": "wf-data17_frak_complex_silesius_seelenlust01_1657_minimal_ocr-eval", - "label": "Workflow on data 17_frak_complex_silesius_seelenlust01_1657_minimal_ocr", + "eval_workflow_id": "wf-dataluther_auszlegunge_1520-eval", + "label": "Workflow on data luther_auszlegunge_1520", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", @@ -7016,16 +1261,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/17_frak_complex.ocrd.zip", - "label": "GT workspace 17th century Black letter complex layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/luther_auszlegunge_1520.ocrd.zip", + "label": "GT workspace luther_auszlegunge_1520" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_silesius_seelenlust01_1657_minimal_ocr_ocr.zip", - "label": "OCR workspace for 17_frak_complex_silesius_seelenlust01_1657_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/luther_auszlegunge_1520_ocr.zip", + "label": "OCR workspace for luther_auszlegunge_1520" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_silesius_seelenlust01_1657_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 17_frak_complex_silesius_seelenlust01_1657_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/luther_auszlegunge_1520_evaluation.zip", + "label": "Evaluation workspace for luther_auszlegunge_1520" }, "workflow_steps": [ { @@ -7061,144 +1306,84 @@ "data_properties": { "fonts": [ "Antiqua", - "Black Letter" + "Fraktur" ], - "publication_century": "1600-1700", + "publication_century": "1500-1600", "publication_decade": "", - "publication_year": "17th century", - "number_of_pages": 5, - "layout": "complex" + "publication_year": "16th century", + "number_of_pages": 2, + "layout": "simple" } } }, "evaluation_results": { "document_wide": { - "wall_time": 13.146098, - "cpu_time": 16.982357, - "cer_mean": 0.3024629978548877, - "cer_median": 0.2951219512195122, + "wall_time": 10.160861, + "cpu_time": 11.973182, + "cer_mean": 0.24855845660550213, + "cer_median": 0.24855845660550213, "cer_range": [ - 0.19271623672230653, - 0.44970414201183434 + 0.07865168539325842, + 0.4184652278177458 ], - "cer_standard_deviation": 0.10474043149462715, - "wer": 0.5195139299620842, - "pages_per_minute": 22.820459728810782 + "cer_standard_deviation": 0.2402844601873776, + "wer": 0.37300307219662054, + "pages_per_minute": 11.81002279235982 }, "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.44970414201183434, - "wer": 0.7101449275362319 - }, - { - "page_id": "phys_0002", - "cer": 0.2179261862917399, - "wer": 0.5208333333333334 - }, { "page_id": "phys_0003", - "cer": 0.2951219512195122, - "wer": 0.4647887323943662 - }, - { - "page_id": "phys_0004", - "cer": 0.19271623672230653, - "wer": 0.3870967741935484 + "cer": 0.4184652278177458, + "wer": 0.48333333333333334 }, { - "page_id": "phys_0005", - "cer": 0.35684647302904565, - "wer": 0.5147058823529411 + "page_id": "phys_0029", + "cer": 0.07865168539325842, + "wer": 0.2626728110599078 } ] } }, { - "eval_workflow_id": "wf-data18_frak_complex_luz_blitz_1784_slower_processors_ocr-eval", - "label": "Workflow on data 18_frak_complex_luz_blitz_1784_slower_processors_ocr", + "eval_workflow_id": "wf-datahuebner_handbuch_1696-eval", + "label": "Workflow on data huebner_handbuch_1696", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", - "label": "GT workspace 18th century Black letter complex layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/huebner_handbuch_1696.ocrd.zip", + "label": "GT workspace huebner_handbuch_1696" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_luz_blitz_1784_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_complex_luz_blitz_1784_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/huebner_handbuch_1696_ocr.zip", + "label": "OCR workspace for huebner_handbuch_1696" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_luz_blitz_1784_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_complex_luz_blitz_1784_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/huebner_handbuch_1696_evaluation.zip", + "label": "Evaluation workspace for huebner_handbuch_1696" }, "workflow_steps": [ { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-cis-ocropy-denoise", - "params": { - "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-tesserocr-segment", + "id": "ocrd-tesserocr-recognize", "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", "dpi": 0, - "padding": 4, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, - "find_tables": true, "find_staves": false, "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, "raw_lines": false, "char_whitelist": "", "char_blacklist": "", @@ -7209,29 +1394,95 @@ "auto_model": false, "oem": "DEFAULT" } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1600-1700", + "publication_decade": "", + "publication_year": "17th century", + "number_of_pages": 3, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 15.981865, + "cpu_time": 18.106327, + "cer_mean": 0.20627969569747484, + "cer_median": 0.08702290076335878, + "cer_range": [ + 0.08487084870848709, + 0.44694533762057875 + ], + "cer_standard_deviation": 0.20842533731220508, + "wer": 0.3271033769383102, + "pages_per_minute": 11.262765640931143 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.44694533762057875, + "wer": 0.6341463414634146 }, { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } + "page_id": "phys_0002", + "cer": 0.08702290076335878, + "wer": 0.1504424778761062 }, + { + "page_id": "phys_0003", + "cer": 0.08487084870848709, + "wer": 0.19672131147540983 + } + ] + } + }, + { + "eval_workflow_id": "wf-datann_besuch_1780-eval", + "label": "Workflow on data nn_besuch_1780", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/nn_besuch_1780.ocrd.zip", + "label": "GT workspace nn_besuch_1780" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/nn_besuch_1780_ocr.zip", + "label": "OCR workspace for nn_besuch_1780" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/nn_besuch_1780_evaluation.zip", + "label": "Evaluation workspace for nn_besuch_1780" + }, + "workflow_steps": [ { "id": "ocrd-tesserocr-recognize", "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, "model": "Fraktur_GT4HistOCR", "dpi": 0, "padding": 0, - "segmentation_level": "word", + "overwrite_segments": false, "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, - "find_tables": true, "find_staves": false, "sparse_text": false, "raw_lines": false, @@ -7252,7 +1503,7 @@ "data_properties": { "fonts": [ "Antiqua", - "Black Letter" + "Fraktur" ], "publication_century": "1700-1800", "publication_decade": "", @@ -7264,173 +1515,105 @@ }, "evaluation_results": { "document_wide": { - "wall_time": 64.58104600000001, - "cpu_time": 99.479389, - "cer_mean": 0.02547899799369996, - "cer_median": 0.027204160076875337, + "wall_time": 14.316583, + "cpu_time": 16.701302, + "cer_mean": 0.06315407734401027, + "cer_median": 0.026726016076928857, "cer_range": [ - 0.010332950631458095, - 0.03717472118959108 + 0.01878453038674033, + 0.18037974683544303 ], - "cer_standard_deviation": 0.011266997500136374, - "wer": 0.07714749104131867, - "pages_per_minute": 3.716260650222357 + "cer_standard_deviation": 0.07825196427362012, + "wer": 0.11662846322503488, + "pages_per_minute": 16.76377666374721 }, "by_page": [ { - "page_id": "phys_0001", - "cer": 0.03717472118959108, - "wer": 0.10256410256410256 + "page_id": "phys_00001", + "cer": 0.18037974683544303, + "wer": 0.1896551724137931 }, { - "page_id": "phys_0002", - "cer": 0.025119617224880382, - "wer": 0.09565217391304348 + "page_id": "phys_00002", + "cer": 0.01878453038674033, + "wer": 0.08670520231213873 }, { - "page_id": "phys_0003", - "cer": 0.029288702928870293, - "wer": 0.07924528301886792 + "page_id": "phys_00003", + "cer": 0.02505446623093682, + "wer": 0.08670520231213873 }, { - "page_id": "phys_0004", - "cer": 0.010332950631458095, - "wer": 0.0311284046692607 + "page_id": "phys_00004", + "cer": 0.028397565922920892, + "wer": 0.10344827586206896 } ] } }, { - "eval_workflow_id": "wf-data19_ant_simple_blumenbach_anatomie_1805_selected_pages_ocr-eval", - "label": "Workflow on data 19_ant_simple_blumenbach_anatomie_1805_selected_pages_ocr", + "eval_workflow_id": "wf-databallenstedt_delatio_1777-eval", + "label": "Workflow on data ballenstedt_delatio_1777", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/19_ant_simple.ocrd.zip", - "label": "GT workspace 19th century Antiqua simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/ballenstedt_delatio_1777.ocrd.zip", + "label": "GT workspace ballenstedt_delatio_1777" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_ant_simple_blumenbach_anatomie_1805_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 19_ant_simple_blumenbach_anatomie_1805_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/ballenstedt_delatio_1777_ocr.zip", + "label": "OCR workspace for ballenstedt_delatio_1777" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_ant_simple_blumenbach_anatomie_1805_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 19_ant_simple_blumenbach_anatomie_1805_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/ballenstedt_delatio_1777_evaluation.zip", + "label": "Evaluation workspace for ballenstedt_delatio_1777" }, "workflow_steps": [ { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 301, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", - "params": { - "level-of-operation": "page", - "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-cis-ocropy-segment", - "params": { - "level-of-operation": "page", - "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", + "id": "ocrd-tesserocr-recognize", "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-calamari-recognize", - "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" } } ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", "document_metadata": { "data_properties": { "fonts": [ "Antiqua", - "Black Letter" + "Fraktur" ], - "publication_century": "1800-1900", + "publication_century": "1700-1800", "publication_decade": "", - "publication_year": "19th century", + "publication_year": "18th century", "number_of_pages": 3, "layout": "simple" } @@ -7438,155 +1621,75 @@ }, "evaluation_results": { "document_wide": { - "wall_time": 97.867795, - "cpu_time": 142.045967, - "cer_mean": 0.08457767160660273, - "cer_median": 0.07737397420867527, + "wall_time": 21.444519, + "cpu_time": 24.028484, + "cer_mean": 0.06438361522903834, + "cer_median": 0.03969957081545064, "cer_range": [ - 0.0416221985058698, - 0.13473684210526315 + 0.021764032073310423, + 0.13168724279835392 ], - "cer_standard_deviation": 0.046973440956685215, - "wer": 0.3081861348528015, - "pages_per_minute": 1.8392158523649174 + "cer_standard_deviation": 0.058972490200809365, + "wer": 0.16906902212925057, + "pages_per_minute": 8.393753200992757 }, "by_page": [ { - "page_id": "phys_0001", - "cer": 0.13473684210526315, - "wer": 0.38666666666666666 + "page_id": "phys_00003", + "cer": 0.021764032073310423, + "wer": 0.12236286919831224 }, { - "page_id": "phys_0002", - "cer": 0.0416221985058698, - "wer": 0.21481481481481482 + "page_id": "phys_00005", + "cer": 0.13168724279835392, + "wer": 0.2603305785123967 }, { - "page_id": "phys_0003", - "cer": 0.07737397420867527, - "wer": 0.3230769230769231 + "page_id": "phys_00010", + "cer": 0.03969957081545064, + "wer": 0.1245136186770428 } ] } }, { - "eval_workflow_id": "wf-data18_frak_complex_nn_besuch_1780_slower_processors_ocr-eval", - "label": "Workflow on data 18_frak_complex_nn_besuch_1780_slower_processors_ocr", + "eval_workflow_id": "wf-datareichsanzeiger_many_ads-eval", + "label": "Workflow on data reichsanzeiger_many_ads", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", - "label": "GT workspace 18th century Black letter complex layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/reichsanzeiger_many_ads.ocrd.zip", + "label": "GT workspace reichsanzeiger_many_ads" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_nn_besuch_1780_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_complex_nn_besuch_1780_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_many_ads_ocr.zip", + "label": "OCR workspace for reichsanzeiger_many_ads" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_nn_besuch_1780_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_complex_nn_besuch_1780_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_many_ads_evaluation.zip", + "label": "Evaluation workspace for reichsanzeiger_many_ads" }, "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-cis-ocropy-denoise", - "params": { - "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-tesserocr-segment", - "params": { - "dpi": 0, - "padding": 4, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, { "id": "ocrd-tesserocr-recognize", "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, "model": "Fraktur_GT4HistOCR", "dpi": 0, "padding": 0, - "segmentation_level": "word", + "overwrite_segments": false, "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, - "find_tables": true, "find_staves": false, "sparse_text": false, "raw_lines": false, @@ -7606,173 +1709,97 @@ "document_metadata": { "data_properties": { "fonts": [ - "Antiqua", - "Black Letter" + "Antiqua" ], - "publication_century": "1700-1800", + "publication_century": "1820-1939", "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 4, - "layout": "complex" + "publication_year": "19th century", + "number_of_pages": 5, + "layout": "reichsanzeiger-gt" } } }, "evaluation_results": { "document_wide": { - "wall_time": 49.367179, - "cpu_time": 71.436347, - "cer_mean": 0.07900170040044713, - "cer_median": 0.018462174141687326, + "wall_time": 344.194331, + "cpu_time": 401.989378, + "cer_mean": 1.3340240550306781, + "cer_median": 0.7412443874278384, "cer_range": [ - 0.013259668508287293, - 0.26582278481012656 + 0.3960932753867003, + 3.7402255639097746 ], - "cer_standard_deviation": 0.1245848990441196, - "wer": 0.13667696498571524, - "pages_per_minute": 4.8615295599531825 + "cer_standard_deviation": 1.378192503303974, + "wer": 1.8653811990174032, + "pages_per_minute": 0.8716006423708356 }, "by_page": [ { - "page_id": "phys_00001", - "cer": 0.26582278481012656, - "wer": 0.3620689655172414 + "page_id": "P_1871_155_0279", + "cer": 0.3960932753867003, + "wer": 0.49564980967917344 }, { - "page_id": "phys_00002", - "cer": 0.013259668508287293, - "wer": 0.057803468208092484 + "page_id": "P_1871_65_0045", + "cer": 0.7412443874278384, + "wer": 0.947814451382694 }, { - "page_id": "phys_00003", - "cer": 0.020697167755991286, - "wer": 0.06936416184971098 + "page_id": "P_1873_1_0017", + "cer": 0.5852251348300515, + "wer": 0.6788418708240535 }, { - "page_id": "phys_00004", - "cer": 0.016227180527383367, - "wer": 0.05747126436781609 + "page_id": "P_1881_1_0662", + "cer": 3.7402255639097746, + "wer": 5.734011627906977 + }, + { + "page_id": "P_1883_55_0044", + "cer": 1.2073319135990264, + "wer": 1.4705882352941178 } ] } }, { - "eval_workflow_id": "wf-data17_frak_complex_huebner_handbuch_1696_slower_processors_ocr-eval", - "label": "Workflow on data 17_frak_complex_huebner_handbuch_1696_slower_processors_ocr", + "eval_workflow_id": "wf-databernd_lebensbeschreibung_1738-eval", + "label": "Workflow on data bernd_lebensbeschreibung_1738", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/17_frak_complex.ocrd.zip", - "label": "GT workspace 17th century Black letter complex layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/bernd_lebensbeschreibung_1738.ocrd.zip", + "label": "GT workspace bernd_lebensbeschreibung_1738" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_huebner_handbuch_1696_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 17_frak_complex_huebner_handbuch_1696_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/bernd_lebensbeschreibung_1738_ocr.zip", + "label": "OCR workspace for bernd_lebensbeschreibung_1738" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_huebner_handbuch_1696_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 17_frak_complex_huebner_handbuch_1696_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/bernd_lebensbeschreibung_1738_evaluation.zip", + "label": "Evaluation workspace for bernd_lebensbeschreibung_1738" }, "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-cis-ocropy-denoise", - "params": { - "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-tesserocr-segment", - "params": { - "dpi": 0, - "padding": 4, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, { "id": "ocrd-tesserocr-recognize", "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, "model": "Fraktur_GT4HistOCR", "dpi": 0, "padding": 0, - "segmentation_level": "word", + "overwrite_segments": false, "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, - "find_tables": true, "find_staves": false, "sparse_text": false, "raw_lines": false, @@ -7793,11 +1820,11 @@ "data_properties": { "fonts": [ "Antiqua", - "Black Letter" + "Fraktur" ], - "publication_century": "1600-1700", + "publication_century": "1700-1800", "publication_decade": "", - "publication_year": "17th century", + "publication_year": "18th century", "number_of_pages": 3, "layout": "complex" } @@ -7805,40 +1832,40 @@ }, "evaluation_results": { "document_wide": { - "wall_time": 61.992684, - "cpu_time": 83.585045, - "cer_mean": 0.20641950275814583, - "cer_median": 0.1099236641221374, + "wall_time": 8.577653, + "cpu_time": 10.790615, + "cer_mean": 0.1790109644215708, + "cer_median": 0.05504587155963303, "cer_range": [ - 0.09132841328413284, - 0.4180064308681672 + 0.008409785932721712, + 0.4735772357723577 ], - "cer_standard_deviation": 0.18347538513029096, - "wer": 0.3802990111723255, - "pages_per_minute": 2.903568427526061 + "cer_standard_deviation": 0.2561653709691831, + "wer": 0.2391410103864555, + "pages_per_minute": 20.984761216150854 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.4180064308681672, - "wer": 0.7073170731707317 + "cer": 0.4735772357723577, + "wer": 0.5882352941176471 }, { "page_id": "phys_0002", - "cer": 0.1099236641221374, - "wer": 0.24778761061946902 + "cer": 0.05504587155963303, + "wer": 0.09821428571428571 }, { "page_id": "phys_0003", - "cer": 0.09132841328413284, - "wer": 0.18579234972677597 + "cer": 0.008409785932721712, + "wer": 0.030973451327433628 } ] } }, { - "eval_workflow_id": "wf-data18_frak_complex_justi_abhandlung01_1758_minimal_ocr-eval", - "label": "Workflow on data 18_frak_complex_justi_abhandlung01_1758_minimal_ocr", + "eval_workflow_id": "wf-dataweigel_gnothi02_1618-eval", + "label": "Workflow on data weigel_gnothi02_1618", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", @@ -7849,16 +1876,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", - "label": "GT workspace 18th century Black letter complex layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/weigel_gnothi02_1618.ocrd.zip", + "label": "GT workspace weigel_gnothi02_1618" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_justi_abhandlung01_1758_minimal_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_complex_justi_abhandlung01_1758_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/weigel_gnothi02_1618_ocr.zip", + "label": "OCR workspace for weigel_gnothi02_1618" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_justi_abhandlung01_1758_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_complex_justi_abhandlung01_1758_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/weigel_gnothi02_1618_evaluation.zip", + "label": "Evaluation workspace for weigel_gnothi02_1618" }, "workflow_steps": [ { @@ -7894,694 +1921,385 @@ "data_properties": { "fonts": [ "Antiqua", - "Black Letter" + "Fraktur" ], - "publication_century": "1700-1800", + "publication_century": "1600-1700", "publication_decade": "", - "publication_year": "18th century", + "publication_year": "17th century", "number_of_pages": 4, - "layout": "complex" + "layout": "simple" } } }, "evaluation_results": { "document_wide": { - "wall_time": 14.25996, - "cpu_time": 17.707908, - "cer_mean": 0.1347518651788604, - "cer_median": 0.14344023364180397, + "wall_time": 27.856097, + "cpu_time": 31.285033, + "cer_mean": 0.11399763421865425, + "cer_median": 0.09987627660053089, "cer_range": [ - 0.09893550407013149, - 0.15319148936170213 + 0.07531106745252128, + 0.18092691622103388 ], - "cer_standard_deviation": 0.02557054025937362, - "wer": 0.19715777371165608, - "pages_per_minute": 16.830341740089032 + "cer_standard_deviation": 0.047514032403792344, + "wer": 0.19377061794956923, + "pages_per_minute": 8.615708079994121 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.13375130616509928, - "wer": 0.2 + "cer": 0.18092691622103388, + "wer": 0.3282051282051282 }, { "page_id": "phys_0002", - "cer": 0.15319148936170213, - "wer": 0.22916666666666666 + "cer": 0.07531106745252128, + "wer": 0.15019762845849802 }, { "page_id": "phys_0003", - "cer": 0.09893550407013149, - "wer": 0.12598425196850394 + "cer": 0.11397849462365592, + "wer": 0.15934065934065933 }, { "page_id": "phys_0004", - "cer": 0.15312916111850866, - "wer": 0.23348017621145375 + "cer": 0.08577405857740586, + "wer": 0.13733905579399142 } ] } }, { - "eval_workflow_id": "wf-data18_frak_complex_euler_rechenkunst01_1738_selected_pages_ocr-eval", - "label": "Workflow on data 18_frak_complex_euler_rechenkunst01_1738_selected_pages_ocr", + "eval_workflow_id": "wf-datalessing_menschengeschlecht_1780-eval", + "label": "Workflow on data lessing_menschengeschlecht_1780", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", - "label": "GT workspace 18th century Black letter complex layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/lessing_menschengeschlecht_1780.ocrd.zip", + "label": "GT workspace lessing_menschengeschlecht_1780" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_euler_rechenkunst01_1738_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_complex_euler_rechenkunst01_1738_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/lessing_menschengeschlecht_1780_ocr.zip", + "label": "OCR workspace for lessing_menschengeschlecht_1780" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_euler_rechenkunst01_1738_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_complex_euler_rechenkunst01_1738_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/lessing_menschengeschlecht_1780_evaluation.zip", + "label": "Evaluation workspace for lessing_menschengeschlecht_1780" }, "workflow_steps": [ { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 201, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", - "params": { - "level-of-operation": "page", - "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-cis-ocropy-segment", - "params": { - "level-of-operation": "page", - "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", + "id": "ocrd-tesserocr-recognize", "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-calamari-recognize", - "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" } } ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", "document_metadata": { "data_properties": { "fonts": [ "Antiqua", - "Black Letter" + "Fraktur" ], "publication_century": "1700-1800", "publication_decade": "", "publication_year": "18th century", - "number_of_pages": 6, - "layout": "complex" + "number_of_pages": 1, + "layout": "simple" } } }, "evaluation_results": { "document_wide": { - "wall_time": 161.89715999999999, - "cpu_time": 264.56102, - "cer_mean": 0.1953749490770702, - "cer_median": 0.1576184687985986, + "wall_time": 2.026755, + "cpu_time": 2.56115, + "cer_mean": 0.02493765586034913, + "cer_median": 0.02493765586034913, "cer_range": [ - 0.09627059843885516, - 0.3873684210526316 + 0.02493765586034913, + 0.02493765586034913 ], - "cer_standard_deviation": 0.11209255429867662, - "wer": 0.3314757763924516, - "pages_per_minute": 2.2236338179125563 + "cer_standard_deviation": null, + "wer": 0.09836065573770492, + "pages_per_minute": 29.60397285315689 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.09627059843885516, - "wer": 0.13953488372093023 - }, - { - "page_id": "phys_0002", - "cer": 0.3873684210526316, - "wer": 0.6036585365853658 - }, - { - "page_id": "phys_0003", - "cer": 0.12221368178324366, - "wer": 0.25728155339805825 - }, - { - "page_id": "phys_0004", - "cer": 0.2595959595959596, - "wer": 0.36129032258064514 - }, - { - "page_id": "phys_0005", - "cer": 0.11377777777777778, - "wer": 0.28402366863905326 - }, - { - "page_id": "phys_0006", - "cer": 0.1930232558139535, - "wer": 0.34306569343065696 + "cer": 0.02493765586034913, + "wer": 0.09836065573770492 } ] } }, { - "eval_workflow_id": "wf-data16_ant_simple_heyden_paedono_1548_selected_pages_ocr-eval", - "label": "Workflow on data 16_ant_simple_heyden_paedono_1548_selected_pages_ocr", + "eval_workflow_id": "wf-dataalberti_pictura_1540-eval", + "label": "Workflow on data alberti_pictura_1540", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_ant_simple.ocrd.zip", - "label": "GT workspace 16th century Antiqua simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/alberti_pictura_1540.ocrd.zip", + "label": "GT workspace alberti_pictura_1540" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_simple_heyden_paedono_1548_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 16_ant_simple_heyden_paedono_1548_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/alberti_pictura_1540_ocr.zip", + "label": "OCR workspace for alberti_pictura_1540" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_simple_heyden_paedono_1548_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_ant_simple_heyden_paedono_1548_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/alberti_pictura_1540_evaluation.zip", + "label": "Evaluation workspace for alberti_pictura_1540" }, "workflow_steps": [ { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 301, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", - "params": { - "level-of-operation": "page", - "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-cis-ocropy-segment", - "params": { - "level-of-operation": "page", - "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", + "id": "ocrd-tesserocr-recognize", "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-calamari-recognize", - "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" } } ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", "document_metadata": { "data_properties": { "fonts": [ "Antiqua", - "Black Letter" + "Fraktur" ], "publication_century": "1500-1600", "publication_decade": "", "publication_year": "16th century", "number_of_pages": 3, - "layout": "simple" + "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 72.48043099999998, - "cpu_time": 107.649963, - "cer_mean": 0.05488709037929677, - "cer_median": 0.03488372093023256, + "wall_time": 7.85773, + "cpu_time": 9.961092, + "cer_mean": 0.10240852523716282, + "cer_median": 0.10536980749746708, "cer_range": [ - 0.028395061728395062, - 0.10138248847926268 + 0.07124352331606218, + 0.1306122448979592 ], - "cer_standard_deviation": 0.04039668560556835, - "wer": 0.13745817563490756, - "pages_per_minute": 2.483428940978566 + "cer_standard_deviation": 0.02979493530847308, + "wer": 0.23466068901129858, + "pages_per_minute": 22.907379103125205 }, "by_page": [ { "page_id": "phys_0007", - "cer": 0.10138248847926268, - "wer": 0.17475728155339806 + "cer": 0.07124352331606218, + "wer": 0.2231404958677686 }, { - "page_id": "phys_0013", - "cer": 0.03488372093023256, - "wer": 0.13592233009708737 + "page_id": "phys_0008", + "cer": 0.10536980749746708, + "wer": 0.2484472049689441 }, { - "page_id": "phys_0014", - "cer": 0.028395061728395062, - "wer": 0.1016949152542373 + "page_id": "phys_0009", + "cer": 0.1306122448979592, + "wer": 0.2323943661971831 } ] } }, { - "eval_workflow_id": "wf-data17_frak_complex_huebner_handbuch_1696_selected_pages_ocr-eval", - "label": "Workflow on data 17_frak_complex_huebner_handbuch_1696_selected_pages_ocr", + "eval_workflow_id": "wf-dataheyden_paedono_1548-eval", + "label": "Workflow on data heyden_paedono_1548", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/17_frak_complex.ocrd.zip", - "label": "GT workspace 17th century Black letter complex layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/heyden_paedono_1548.ocrd.zip", + "label": "GT workspace heyden_paedono_1548" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_huebner_handbuch_1696_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 17_frak_complex_huebner_handbuch_1696_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/heyden_paedono_1548_ocr.zip", + "label": "OCR workspace for heyden_paedono_1548" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_huebner_handbuch_1696_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 17_frak_complex_huebner_handbuch_1696_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/heyden_paedono_1548_evaluation.zip", + "label": "Evaluation workspace for heyden_paedono_1548" }, "workflow_steps": [ { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 301, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", - "params": { - "level-of-operation": "page", - "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-cis-ocropy-segment", - "params": { - "level-of-operation": "page", - "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", + "id": "ocrd-tesserocr-recognize", "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-calamari-recognize", - "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" } } ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", "document_metadata": { "data_properties": { "fonts": [ "Antiqua", - "Black Letter" + "Fraktur" ], - "publication_century": "1600-1700", + "publication_century": "1500-1600", "publication_decade": "", - "publication_year": "17th century", + "publication_year": "16th century", "number_of_pages": 3, - "layout": "complex" + "layout": "simple" } } }, "evaluation_results": { "document_wide": { - "wall_time": 182.12772, - "cpu_time": 235.173884, - "cer_mean": 0.2294897403822962, - "cer_median": 0.06717557251908397, + "wall_time": 8.303854, + "cpu_time": 10.292532, + "cer_mean": 0.07452119312897007, + "cer_median": 0.0629800307219662, "cer_range": [ - 0.061808118081180814, - 0.5594855305466238 + 0.037037037037037035, + 0.12354651162790697 ], - "cer_standard_deviation": 0.2857973382248686, - "wer": 0.36395117469236493, - "pages_per_minute": 0.9883174291096379 + "cer_standard_deviation": 0.044394494261965886, + "wer": 0.22683890077340793, + "pages_per_minute": 21.676681695029803 }, "by_page": [ { - "page_id": "phys_0001", - "cer": 0.5594855305466238, - "wer": 0.8292682926829268 + "page_id": "phys_0007", + "cer": 0.0629800307219662, + "wer": 0.21359223300970873 }, { - "page_id": "phys_0002", - "cer": 0.06717557251908397, - "wer": 0.11504424778761062 + "page_id": "phys_0013", + "cer": 0.12354651162790697, + "wer": 0.33980582524271846 }, { - "page_id": "phys_0003", - "cer": 0.061808118081180814, - "wer": 0.14754098360655737 + "page_id": "phys_0014", + "cer": 0.037037037037037035, + "wer": 0.1271186440677966 } ] } }, { - "eval_workflow_id": "wf-data17_frak_simple_calvi_beutelschneider01_1627_slower_processors_ocr-eval", - "label": "Workflow on data 17_frak_simple_calvi_beutelschneider01_1627_slower_processors_ocr", + "eval_workflow_id": "wf-dataluz_blitz_1784-eval", + "label": "Workflow on data luz_blitz_1784", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/17_frak_simple.ocrd.zip", - "label": "GT workspace 17th century Black letter simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/luz_blitz_1784.ocrd.zip", + "label": "GT workspace luz_blitz_1784" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_simple_calvi_beutelschneider01_1627_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 17_frak_simple_calvi_beutelschneider01_1627_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/luz_blitz_1784_ocr.zip", + "label": "OCR workspace for luz_blitz_1784" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_simple_calvi_beutelschneider01_1627_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 17_frak_simple_calvi_beutelschneider01_1627_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/luz_blitz_1784_evaluation.zip", + "label": "Evaluation workspace for luz_blitz_1784" }, "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-cis-ocropy-denoise", - "params": { - "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-tesserocr-segment", - "params": { - "dpi": 0, - "padding": 4, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, { "id": "ocrd-tesserocr-recognize", "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, "model": "Fraktur_GT4HistOCR", "dpi": 0, "padding": 0, - "segmentation_level": "word", + "overwrite_segments": false, "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, - "find_tables": true, "find_staves": false, "sparse_text": false, "raw_lines": false, @@ -8602,52 +2320,57 @@ "data_properties": { "fonts": [ "Antiqua", - "Black Letter" + "Fraktur" ], - "publication_century": "1600-1700", + "publication_century": "1700-1800", "publication_decade": "", - "publication_year": "17th century", - "number_of_pages": 3, - "layout": "simple" + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 57.283714, - "cpu_time": 83.17899299999999, - "cer_mean": 0.15501219595245352, - "cer_median": 0.11654526534859522, + "wall_time": 19.577649, + "cpu_time": 22.415525, + "cer_mean": 0.030367084502750087, + "cer_median": 0.02381735856100435, "cer_range": [ - 0.09618717504332755, - 0.2523041474654378 + 0.014354066985645933, + 0.05947955390334572 ], - "cer_standard_deviation": 0.08486993479509113, - "wer": 0.360948415946103, - "pages_per_minute": 3.142254358717034 + "cer_standard_deviation": 0.02030808283356641, + "wer": 0.08722583259487592, + "pages_per_minute": 12.258877457655922 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.11654526534859522, - "wer": 0.46496815286624205 + "cer": 0.05947955390334572, + "wer": 0.1794871794871795 }, { "page_id": "phys_0002", - "cer": 0.2523041474654378, - "wer": 0.4 + "cer": 0.014354066985645933, + "wer": 0.043478260869565216 }, { "page_id": "phys_0003", - "cer": 0.09618717504332755, - "wer": 0.21787709497206703 + "cer": 0.02869097429766886, + "wer": 0.07924528301886792 + }, + { + "page_id": "phys_0004", + "cer": 0.01894374282433984, + "wer": 0.04669260700389105 } ] } }, { - "eval_workflow_id": "wf-data18_frak_complex_estor_rechtsgelehrsamkeit02_1758_minimal_ocr-eval", - "label": "Workflow on data 18_frak_complex_estor_rechtsgelehrsamkeit02_1758_minimal_ocr", + "eval_workflow_id": "wf-dataestor_rechtsgelehrsamkeit02_1758-eval", + "label": "Workflow on data estor_rechtsgelehrsamkeit02_1758", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", @@ -8658,16 +2381,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", - "label": "GT workspace 18th century Black letter complex layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/estor_rechtsgelehrsamkeit02_1758.ocrd.zip", + "label": "GT workspace estor_rechtsgelehrsamkeit02_1758" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_estor_rechtsgelehrsamkeit02_1758_minimal_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_complex_estor_rechtsgelehrsamkeit02_1758_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/estor_rechtsgelehrsamkeit02_1758_ocr.zip", + "label": "OCR workspace for estor_rechtsgelehrsamkeit02_1758" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_estor_rechtsgelehrsamkeit02_1758_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_complex_estor_rechtsgelehrsamkeit02_1758_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/estor_rechtsgelehrsamkeit02_1758_evaluation.zip", + "label": "Evaluation workspace for estor_rechtsgelehrsamkeit02_1758" }, "workflow_steps": [ { @@ -8703,7 +2426,7 @@ "data_properties": { "fonts": [ "Antiqua", - "Black Letter" + "Fraktur" ], "publication_century": "1700-1800", "publication_decade": "", @@ -8715,8 +2438,8 @@ }, "evaluation_results": { "document_wide": { - "wall_time": 18.505333, - "cpu_time": 22.105623, + "wall_time": 19.327455, + "cpu_time": 22.561449, "cer_mean": 0.11589408928020027, "cer_median": 0.10084584323499293, "cer_range": [ @@ -8725,7 +2448,7 @@ ], "cer_standard_deviation": 0.07259145757108061, "wer": 0.20102650242627845, - "pages_per_minute": 12.969234328287957 + "pages_per_minute": 12.417568686617043 }, "by_page": [ { @@ -8752,187 +2475,210 @@ } }, { - "eval_workflow_id": "wf-data17_frak_complex_silesius_seelenlust01_1657_selected_pages_ocr-eval", - "label": "Workflow on data 17_frak_complex_silesius_seelenlust01_1657_selected_pages_ocr", + "eval_workflow_id": "wf-datatrota_mordtbrenner_1540-eval", + "label": "Workflow on data trota_mordtbrenner_1540", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/17_frak_complex.ocrd.zip", - "label": "GT workspace 17th century Black letter complex layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/trota_mordtbrenner_1540.ocrd.zip", + "label": "GT workspace trota_mordtbrenner_1540" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_silesius_seelenlust01_1657_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 17_frak_complex_silesius_seelenlust01_1657_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/trota_mordtbrenner_1540_ocr.zip", + "label": "OCR workspace for trota_mordtbrenner_1540" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_silesius_seelenlust01_1657_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 17_frak_complex_silesius_seelenlust01_1657_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/trota_mordtbrenner_1540_evaluation.zip", + "label": "Evaluation workspace for trota_mordtbrenner_1540" }, "workflow_steps": [ { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 301, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", + "id": "ocrd-tesserocr-recognize", "params": { - "level-of-operation": "page", + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" } - }, + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 2, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 7.192959, + "cpu_time": 8.533417, + "cer_mean": 0.043071000855431994, + "cer_median": 0.043071000855431994, + "cer_range": [ + 0.014285714285714285, + 0.0718562874251497 + ], + "cer_standard_deviation": 0.04070854266369089, + "wer": 0.10714905431199029, + "pages_per_minute": 16.682981232063188 + }, + "by_page": [ { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } + "page_id": "phys_0001", + "cer": 0.0718562874251497, + "wer": 0.19469026548672566 }, { - "id": "ocrd-cis-ocropy-segment", - "params": { - "level-of-operation": "page", - "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 - } - }, + "page_id": "phys_0002", + "cer": 0.014285714285714285, + "wer": 0.0196078431372549 + } + ] + } + }, + { + "eval_workflow_id": "wf-datajusti_abhandlung01_1758-eval", + "label": "Workflow on data justi_abhandlung01_1758", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/justi_abhandlung01_1758.ocrd.zip", + "label": "GT workspace justi_abhandlung01_1758" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/justi_abhandlung01_1758_ocr.zip", + "label": "OCR workspace for justi_abhandlung01_1758" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/justi_abhandlung01_1758_evaluation.zip", + "label": "Evaluation workspace for justi_abhandlung01_1758" + }, + "workflow_steps": [ { - "id": "ocrd-cis-ocropy-dewarp", + "id": "ocrd-tesserocr-recognize", "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-calamari-recognize", - "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" } } ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", "document_metadata": { "data_properties": { "fonts": [ "Antiqua", - "Black Letter" + "Fraktur" ], - "publication_century": "1600-1700", + "publication_century": "1700-1800", "publication_decade": "", - "publication_year": "17th century", - "number_of_pages": 5, + "publication_year": "18th century", + "number_of_pages": 4, "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 108.68203399999999, - "cpu_time": 154.24126800000005, - "cer_mean": 0.2924684311074824, - "cer_median": 0.2583479789103691, + "wall_time": 13.437096, + "cpu_time": 16.64254, + "cer_mean": 0.12942563348778183, + "cer_median": 0.13278777025964683, "cer_range": [ - 0.06676783004552352, - 0.7536585365853659 + 0.09893550407013149, + 0.15319148936170213 ], - "cer_standard_deviation": 0.2774715020464407, - "wer": 0.4982391606465789, - "pages_per_minute": 2.7603458360008246 + "cer_standard_deviation": 0.022501323128153037, + "wer": 0.19483806092245076, + "pages_per_minute": 17.861002109384348 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.08481262327416174, - "wer": 0.43478260869565216 + "cer": 0.13375130616509928, + "wer": 0.2 }, { "page_id": "phys_0002", - "cer": 0.2583479789103691, - "wer": 0.5625 + "cer": 0.15319148936170213, + "wer": 0.22916666666666666 }, { "page_id": "phys_0003", - "cer": 0.7536585365853659, - "wer": 0.9014084507042254 + "cer": 0.09893550407013149, + "wer": 0.12992125984251968 }, { "page_id": "phys_0004", - "cer": 0.06676783004552352, - "wer": 0.04838709677419355 - }, - { - "page_id": "phys_0005", - "cer": 0.2987551867219917, - "wer": 0.5441176470588235 + "cer": 0.1318242343541944, + "wer": 0.22026431718061673 } ] } }, { - "eval_workflow_id": "wf-data18_frak_complex_euler_rechenkunst01_1738_minimal_ocr-eval", - "label": "Workflow on data 18_frak_complex_euler_rechenkunst01_1738_minimal_ocr", + "eval_workflow_id": "wf-datareichsanzeiger_random-eval", + "label": "Workflow on data reichsanzeiger_random", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", @@ -8943,16 +2689,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", - "label": "GT workspace 18th century Black letter complex layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/reichsanzeiger_random.ocrd.zip", + "label": "GT workspace reichsanzeiger_random" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_euler_rechenkunst01_1738_minimal_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_complex_euler_rechenkunst01_1738_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_random_ocr.zip", + "label": "OCR workspace for reichsanzeiger_random" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_euler_rechenkunst01_1738_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_complex_euler_rechenkunst01_1738_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_random_evaluation.zip", + "label": "Evaluation workspace for reichsanzeiger_random" }, "workflow_steps": [ { @@ -8987,61 +2733,60 @@ "document_metadata": { "data_properties": { "fonts": [ - "Antiqua", - "Black Letter" + "Antiqua" ], - "publication_century": "1700-1800", + "publication_century": "1820-1939", "publication_decade": "", - "publication_year": "18th century", + "publication_year": "19th century", "number_of_pages": 6, - "layout": "complex" + "layout": "reichsanzeiger-gt" } } }, "evaluation_results": { "document_wide": { - "wall_time": 18.674437, - "cpu_time": 24.291901, - "cer_mean": 0.25835849983393794, - "cer_median": 0.230420483908856, + "wall_time": 636.322401, + "cpu_time": 852.529652, + "cer_mean": 1.1094634128044334, + "cer_median": 0.8993673021608106, "cer_range": [ - 0.08586296617519515, - 0.5747368421052632 + 0.30019453260980855, + 1.9778823058446757 ], - "cer_standard_deviation": 0.16915619139008603, - "wer": 0.37827288927088376, - "pages_per_minute": 19.277689603172508 + "cer_standard_deviation": 0.645494993792005, + "wer": 1.3304151477151094, + "pages_per_minute": 0.565750945486516 }, "by_page": [ { - "page_id": "phys_0001", - "cer": 0.08586296617519515, - "wer": 0.10465116279069768 + "page_id": "P_1879_45_0344", + "cer": 0.9843462873477166, + "wer": 1.1759884281581485 }, { - "page_id": "phys_0002", - "cer": 0.5747368421052632, - "wer": 0.7987804878048781 + "page_id": "P_1885_5_0055", + "cer": 0.30019453260980855, + "wer": 0.40127817019845274 }, { - "page_id": "phys_0003", - "cer": 0.2767102229054573, - "wer": 0.3786407766990291 + "page_id": "P_1889_1_0018", + "cer": 0.7868184930272782, + "wer": 0.9704142011834319 }, { - "page_id": "phys_0004", - "cer": 0.22828282828282828, - "wer": 0.27741935483870966 + "page_id": "P_1891_33_0452", + "cer": 0.8143883169739047, + "wer": 1.112810707456979 }, { - "page_id": "phys_0005", - "cer": 0.152, - "wer": 0.27218934911242604 + "page_id": "P_1932_300_0488", + "cer": 1.9778823058446757, + "wer": 2.349493487698987 }, { - "page_id": "phys_0006", - "cer": 0.23255813953488372, - "wer": 0.43795620437956206 + "page_id": "P_1936_123_0292", + "cer": 1.7931505410232167, + "wer": 1.9725058915946583 } ] } diff --git a/default_data_sources.txt b/data_srcs/default_data_sources.txt similarity index 100% rename from default_data_sources.txt rename to data_srcs/default_data_sources.txt diff --git a/data_srcs/reichsanzeiger_full.txt b/data_srcs/reichsanzeiger_full.txt new file mode 100644 index 0000000..87ce334 --- /dev/null +++ b/data_srcs/reichsanzeiger_full.txt @@ -0,0 +1 @@ +https://github.com/UB-Mannheim/reichsanzeiger-gt \ No newline at end of file diff --git a/data_srcs/reichsanzeiger_many_ads.list b/data_srcs/reichsanzeiger_many_ads.list new file mode 100644 index 0000000..c5ef614 --- /dev/null +++ b/data_srcs/reichsanzeiger_many_ads.list @@ -0,0 +1,5 @@ +053-7972/0044.jp2&CVT=jpeg 1883_55_0044.jpg +041-7960/0662.jp2&CVT=jpeg 1881_1_0662.jpg +102-9983/0045.jp2&CVT=jpeg 1871_65_0045.jpg +101-9982/0017.jp2&CVT=jpeg 1873_1_0017.jpg +003-7922/0279.jp2&CVT=jpeg 1871_155_0279.jpg diff --git a/data_srcs/reichsanzeiger_random.list b/data_srcs/reichsanzeiger_random.list new file mode 100644 index 0000000..a488a19 --- /dev/null +++ b/data_srcs/reichsanzeiger_random.list @@ -0,0 +1,6 @@ +104-8023/0452.jp2&CVT=jpeg 1891_33_0452.jpg +090-8009/0018.jp2&CVT=jpeg 1889_1_0018.jpg +065-7984/0055.jp2&CVT=jpeg 1885_5_0055.jpg +093-9053/0488.jp2&CVT=jpeg 1932_300_0488.jpg +014-8455/0292.jp2&CVT=jpeg 1936_123_0292.jpg +035-7954/0344.jp2&CVT=jpeg 1879_45_0344.jpg diff --git a/data_srcs/reichsanzeiger_tables.list b/data_srcs/reichsanzeiger_tables.list new file mode 100644 index 0000000..6518229 --- /dev/null +++ b/data_srcs/reichsanzeiger_tables.list @@ -0,0 +1,5 @@ +093-9053/0488.jp2&CVT=jpeg 1932_300_0488.jpg +014-8455/0292.jp2&CVT=jpeg 1936_123_0292.jpg +076-9036/0019.jp2&CVT=jpeg 1929_250_0019.jpg +027-7946/0059.jp2&CVT=jpeg 1877_7_0059.jpg +053-7972/0044.jp2&CVT=jpeg 1883_55_0044.jpg diff --git a/data_srcs/reichsanzeiger_title_pages.list b/data_srcs/reichsanzeiger_title_pages.list new file mode 100644 index 0000000..3fd2509 --- /dev/null +++ b/data_srcs/reichsanzeiger_title_pages.list @@ -0,0 +1,5 @@ +044-7963/0163.jp2&CVT=jpeg 1881_115_0163.jpg +079-7998/0444.jp2&CVT=jpeg 1887_134_0444.jpg +153-9561/0087.jp2&CVT=jpeg 1916_169_0087.jpg +167-9449/0129.jp2&CVT=jpeg 1918_267_0129.jpg +065-7984/0054.jp2&CVT=jpeg 1885_5_0054.jpg diff --git a/docker-compose.yml b/docker-compose.yml index c4be3ef..b7ca20e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,7 +10,6 @@ services: - ./data:/app/data # this will write the results to your host system - ./gt:/app/gt # mount your modules, custom workflows and data here - - ./models:/usr/local/share/ocrd-resources/ - #- ./workflows:/app/workflows - # - ./workflows/ocrd_workflows:/app/workflows/ocrd_workflows + #- ./models/ocrd-tesserocr-recognize:/usr/local/share/tessdata/ + #- ./models/ocrd-calamari-recognize:/usr/local/share/ocrd-resources/ # - TODO/custom/data \ No newline at end of file diff --git a/prepare.sh b/prepare.sh deleted file mode 100644 index cd60dcf..0000000 --- a/prepare.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -mkdir gt - -while IFS= read -r URL; do - OWNER=$(echo "$URL" | cut -d'/' -f4) - REPO=$(echo "$URL" | cut -d'/' -f5) - if [[ ! -f gt/"$REPO".zip ]]; then - echo "Downloading $REPO …" - RESULT=$(curl -L \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - https://api.github.com/repos/"$OWNER"/"$REPO"/releases/latest) - ZIP_URL=$(echo "$RESULT" | jq -r '.assets | .[].browser_download_url') - curl -L -o gt/"$REPO".zip "$ZIP_URL" - fi -done < default_data_sources.txt - -cd gt || exit -# the default data is structured like this: -# repository_name.zip -# |___ subordinate_work_1.zip -# |___ subordinate_work_2.zip -# |___ ... -# $ZIP refers to the release itself which is on level "repository_name.zip" -# the subordinate works are also OCR-D BagIts / zips. these are referred to by $INNER_ZIP. -for ZIP in *.zip; do - NAME=$(echo "$ZIP" | cut -d"." -f1) - echo "Processing $NAME" - unzip -qq -d "$NAME" "$ZIP" - mv "$NAME"/ocrdzip_out/* "$NAME" && rm -r "$NAME"/ocrdzip_out - for INNER_ZIP in "$NAME"/*.zip; do - echo "Dealing with inner zip files …" - INNER_ZIP_NAME=$(basename "$INNER_ZIP" .ocrd.zip) - unzip -qq -d "$NAME"/"$INNER_ZIP_NAME" "$INNER_ZIP" && rm "$INNER_ZIP" - - echo "Done." - done -done - -echo " … and ready to go!" diff --git a/scripts/convert-yml-to-json.py b/scripts/convert-yml-to-json.py new file mode 100644 index 0000000..0ff8023 --- /dev/null +++ b/scripts/convert-yml-to-json.py @@ -0,0 +1,22 @@ +#!env python3 + +# This script has been copied from https://github.com/OCR-D/spec/blob/master/scripts/yaml-to-json.py + +from yaml import safe_load +from json import dumps +from click import command, argument, option + +@command() +@option('--indent', default=2, type=int) +@argument('src') +@argument('dst') +def cli(src, dst, indent): + kwargs = {} + if indent > 0: + kwargs['indent'] = indent + with open(src, 'r', encoding='utf-8') as f_in, open(dst, 'w', encoding='utf-8') as f_out: + ret = safe_load(f_in) + f_out.write(dumps(ret, **kwargs)) + +if __name__ == '__main__': + cli() \ No newline at end of file diff --git a/scripts/prepare.sh b/scripts/prepare.sh new file mode 100644 index 0000000..287c1e5 --- /dev/null +++ b/scripts/prepare.sh @@ -0,0 +1,98 @@ +#!/bin/bash + +mkdir gt + +echo "Prepare OCR-D Ground Truth …" + +while IFS= read -r URL; do + OWNER=$(echo "$URL" | cut -d'/' -f4) + REPO=$(echo "$URL" | cut -d'/' -f5) + if [[ ! -f gt/"$REPO".zip ]]; then + echo "Downloading $REPO …" + RESULT=$(curl -L \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + https://api.github.com/repos/"$OWNER"/"$REPO"/releases/latest) + ZIP_URL=$(echo "$RESULT" | jq -r '.assets | .[].browser_download_url') + curl -L -o gt/"$REPO".zip "$ZIP_URL" + fi +done < data_srcs/default_data_sources.txt + +cd gt || exit +# the default data is structured like this: +# repository_name.zip +# |___ subordinate_work_1.zip +# |___ subordinate_work_2.zip +# |___ ... +# $ZIP refers to the release itself which is on level "repository_name.zip" +# the subordinate works are also OCR-D BagIts / zips. these are referred to by $INNER_ZIP. +for ZIP in *.zip; do + NAME=$(echo "$ZIP" | cut -d"." -f1) + echo "Processing $NAME" + if [[ ! -d $NAME && $NAME != "reichsanzeiger-gt" ]]; then + unzip -qq -d "$NAME" "$ZIP" + mv "$NAME"/ocrdzip_out/* "$NAME" && rm -r "$NAME"/ocrdzip_out + for INNER_ZIP in "$NAME"/*.zip; do + echo "Dealing with inner zip files …" + INNER_ZIP_NAME=$(basename "$INNER_ZIP" .ocrd.zip) + unzip -qq -d "$NAME"/"$INNER_ZIP_NAME" "$INNER_ZIP" && rm "$INNER_ZIP" + + echo "Recreate required directory structure for $INNER_ZIP_NAME." + mkdir "$NAME"/"$INNER_ZIP_NAME"/data/"$INNER_ZIP_NAME" + mv "$NAME"/"$INNER_ZIP_NAME"/data/OCR-* "$NAME"/"$INNER_ZIP_NAME"/data/"$INNER_ZIP_NAME" + mv "$NAME"/"$INNER_ZIP_NAME"/data/mets.xml "$NAME"/"$INNER_ZIP_NAME"/data/"$INNER_ZIP_NAME" + cp "$NAME"/metadata.json "$NAME"/"$INNER_ZIP_NAME"/data/"$INNER_ZIP_NAME"/metadata.json + + echo "Moving $INNER_ZIP_NAME higher in dir structure." + mv "$NAME"/"$INNER_ZIP_NAME" . + echo "Done." + done + rm -rf "$NAME" + fi +done + +echo "Prepare Reichsanzeiger GT …" + +if [[ $1 == "ra-full" ]]; then + echo "Preparing the full Reichsanzeiger GT." + + if [ ! -d reichsanzeiger-gt ]; then + git clone https://github.com/UB-Mannheim/reichsanzeiger-gt + fi + + RA_GT=/app/gt/reichsanzeiger-gt + DATA_DIR=/$RA_GT/data + cd $DATA_DIR|| exit + + if [[ -d reichsanzeiger-1820-1939/OCR-D-IMG ]]; then + echo "Skip downloading Reichsanzeiger images." + else + bash download_images.sh + fi + cd reichsanzeiger-1820-1939 || exit + ocrd workspace init + mkdir OCR-D-IMG + cp ../images/* OCR-D-IMG + rm -rf ../images + rm -rf ../reichsanzeiger-1820-1939_with-TableRegion + cp -r GT-PAGE OCR-D-GT-SEG-LINE + + echo "Adding images to mets …" + + FILEGRP="OCR-D-IMG" + EXT=".jpg" # the actual extension of the image files + MEDIATYPE='image/jpeg' # the actual media type of the image files + for i in "$FILEGRP"/*"$EXT"; do + BASE=$(basename "${i}" $EXT); + ocrd workspace add -G $FILEGRP -i ${FILEGRP}_"${BASE}" -g P_"${BASE}" -m $MEDIATYPE "${i}"; + done + + python3 /app/scripts/convert-yml-to-json.py --indent 2 $RA_GT/METADATA.yml $RA_GT/metadata.json + + echo " … and ready to go!" + +else + echo "Prepare smaller sets of Reichsanzeiger GT." + cd /app || exit + bash /app/scripts/prepare_reichsanzeiger_sets.sh +fi diff --git a/scripts/prepare_reichsanzeiger_sets.sh b/scripts/prepare_reichsanzeiger_sets.sh new file mode 100644 index 0000000..a5667f4 --- /dev/null +++ b/scripts/prepare_reichsanzeiger_sets.sh @@ -0,0 +1,68 @@ +#!/bin/sh + +echo "Download Reichsanzeiger GT repository." +cd gt || exit +if [ ! -d reichsanzeiger-gt ]; then + git clone https://github.com/UB-Mannheim/reichsanzeiger-gt +fi + +cd .. || exit + + +PREFIX="data_srcs" +files=( + "$PREFIX"/reichsanzeiger_many_ads.list + "$PREFIX"/reichsanzeiger_random.list + "$PREFIX"/reichsanzeiger_tables.list + "$PREFIX"/reichsanzeiger_title_pages.list +) + +for FILE in "${files[@]}"; do + NAME=$(basename "$FILE" .list) + if [ -d gt/"$NAME" ]; then + echo "Directory gt/$NAME already exists. Skipping download." + else + echo "Processing $FILE." + mkdir -p gt/"$NAME"/data/"$NAME"/OCR-D-IMG + mkdir -p gt/"$NAME"/data/"$NAME"/OCR-D-GT-SEG-LINE + + urlbase=$(echo "aHR0cHM6Ly9kaWdpLmJpYi51bmktbWFubmhlaW0uZGUvcmVpY2hzYW56ZWlnZXIuZmNnaT9GSUY9 +L3JlaWNoc2FuemVpZ2VyL2ZpbG0vCg==" | base64 -d) + while read -r line; do + wget --limit-rate=500k "${urlbase}${line% *}" -O ./gt/"$NAME"/data/"$NAME"/OCR-D-IMG/"${line#* }" + IMG_NAME=$(basename "${line#* }" .jpg) + cp gt/reichsanzeiger-gt/data/reichsanzeiger-1820-1939/GT-PAGE/"$IMG_NAME".xml gt/"$NAME"/data/"$NAME"/OCR-D-GT-SEG-LINE/"$IMG_NAME".xml + done < "$FILE" + fi + + if [ ! -f gt/"$NAME"/mets.xml ]; then + echo "Preparing OCR-D workspace for $NAME". + cd gt/"$NAME"/data/"$NAME" || exit + ocrd workspace init + ocrd workspace set-id "$NAME" + + FILEGRP="OCR-D-IMG" + FILEGRP_2="OCR-D-GT-SEG-LINE" + # add images to mets + EXT=".jpg" # the actual extension of the image files + MEDIATYPE='image/jpeg' # the actual media type of the image files + for i in OCR-D-IMG/*"$EXT"; do + BASE=$(basename "${i}" $EXT) + ocrd workspace add -G $FILEGRP -i "${FILEGRP}"_"${BASE}" -g P_"${BASE}" -m $MEDIATYPE "${i}" + done + + # add GT to mets + for i in "$FILEGRP_2"/*.xml; do + BASE=$(basename "${i}" ".xml") + ocrd workspace add -G $FILEGRP_2 -i "${FILEGRP_2}"_"${BASE}" -g P_"${BASE}" -m text/xml "${i}" + done + fi + + if [ ! -f gt/"$NAME"/data/"$NAME"/metadata.json ]; then + cp /app/gt/reichsanzeiger-gt/METADATA.yml /app/gt/"$NAME"/data/"$NAME"/METADATA.yml + python3 /app/scripts/convert-yml-to-json.py --indent 2 /app/gt/"$NAME"/data/"$NAME"/METADATA.yml /app/gt/"$NAME"/data/"$NAME"/metadata.json + fi + cd /app || exit +done + +echo "Preparation of Reichsanzeiger GT subsets done." diff --git a/src/benchmark_extraction.py b/src/benchmark_extraction.py index 2dfe6c2..39b4b21 100644 --- a/src/benchmark_extraction.py +++ b/src/benchmark_extraction.py @@ -106,17 +106,8 @@ def get_eval_tool(mets_path: str) -> str: def get_gt_workspace(workspace_path: str) -> Dict[str, str]: current_workspace = get_workspace_name(workspace_path) - split_workspace_name = current_workspace.split('_') - workspace_name_wo_workflow = split_workspace_name[0] + '_' + split_workspace_name[1] + '_' + split_workspace_name[2] - font = '' - if split_workspace_name[1] == 'ant': - font = 'Antiqua' - elif split_workspace_name[1] == 'frak': - font = 'Fraktur' - else: - font = 'Font Mix' - url = 'https://github.com/OCR-D/quiver-data/blob/main/' + workspace_name_wo_workflow + '.ocrd.zip' - label = f'GT workspace {split_workspace_name[0]}th century {font} {split_workspace_name[2]} layout' + url = 'https://github.com/OCR-D/quiver-data/blob/main/' + current_workspace + '.ocrd.zip' + label = f'GT workspace {current_workspace}' return { '@id': url, 'label': label @@ -196,7 +187,8 @@ def get_nextflow_completed_process_file(workspace_path: str): return file def get_nextflow_time(workspace_path: str, time_type: str) -> float: - files = listdir(workspace_path) + highest_workspace_dir = '/'.join(workspace_path.split('/')[:-2]) + files = listdir(highest_workspace_dir) logs = [] for file in files: if '.command.log' in file: @@ -204,7 +196,7 @@ def get_nextflow_time(workspace_path: str, time_type: str) -> float: time_per_workflow_step = [] for log in logs: - with open(workspace_path + '/' + log, 'r', encoding='utf-8') as l: + with open(highest_workspace_dir + '/' + log, 'r', encoding='utf-8') as l: log_file = l.read() no_sec_s = re.search(rf'([0-9]+?\.[0-9]+?)s \({time_type}\)', log_file).group(1) time_per_workflow_step.append(float(no_sec_s)) diff --git a/workflows/execute_workflows.sh b/workflows/execute_workflows.sh index 5f3f59f..db8957d 100755 --- a/workflows/execute_workflows.sh +++ b/workflows/execute_workflows.sh @@ -39,9 +39,9 @@ convert_ocrd_wfs_to_NextFlow() { download_models() { echo "Download the necessary models if not available" - if [[ ! -f /usr/local/share/ocrd-resources/ocrd-tesserocr-recognize/ ]] + if [[ ! -d /usr/local/share/tessdata ]] then - mkdir -p /usr/local/share/ocrd-resources/ + #mkdir -p /usr/local/share/ocrd-resources/ ocrd resmgr download ocrd-tesserocr-recognize '*' fi if [[ ! -d /usr/local/share/ocrd-resources/ocrd-calamari-recognize/qurator-gt4histocr-1.0 ]] @@ -58,32 +58,15 @@ create_wf_specific_workspaces() { # create workspace for all OCR workflows. # each workflow has a separate workspace to work with. - echo "Create workflow specific workspaces for each dir in ./gt …" for DIR in "$ROOT"/gt/*/; do DIR_NAME=$(basename "$DIR") - if grep -q "multivolume work" <<< "$(cat $DIR/mets.xml)"; then - echo "$DIR_NAME is a multivolume work" - - for WORKFLOW in "$OCRD_WORKFLOW_DIR"/*ocr.txt.nf - do - WF_NAME=$(basename -s .txt.nf "$WORKFLOW") - for SUB_WORK in $DIR/*/; do - SUB_WORK_DIR_NAME=$(basename "$SUB_WORK") - TARGET="$WORKSPACE_DIR"/tmp/"$DIR_NAME"_"$SUB_WORK_DIR_NAME"_"$WF_NAME" - cp -r "$ROOT"/gt/"$DIR_NAME"/"$SUB_WORK_DIR_NAME" "$TARGET" - if [[ -f "$ROOT"/gt/"$DIR_NAME"/metadata.json ]]; then - cp -r "$ROOT"/gt/"$DIR_NAME"/metadata.json "$TARGET"/metadata.json - fi - cp "$WORKFLOW" "$TARGET"/data/ - done - - done - else + if [[ ! $DIR_NAME == "reichsanzeiger-gt" ]]; then + echo "Create workflow specific workspace for $DIR_NAME." for WORKFLOW in "$OCRD_WORKFLOW_DIR"/*ocr.txt.nf do WF_NAME=$(basename -s .txt.nf "$WORKFLOW") cp -r "$ROOT"/gt/"$DIR_NAME" "$WORKSPACE_DIR"/tmp/"$DIR_NAME"_"$WF_NAME" - cp "$WORKFLOW" "$WORKSPACE_DIR"/tmp/"$DIR_NAME"_"$WF_NAME"/ + cp "$WORKFLOW" "$WORKSPACE_DIR"/tmp/"$DIR_NAME"_"$WF_NAME"/data/*/ done fi done @@ -93,11 +76,10 @@ clean_up_tmp_dirs() { echo "Clean up intermediate dirs …" for DIR in "$WORKSPACE_DIR"/tmp/* do + echo "Cleaning up $DIR." DIR_NAME=$(basename "$DIR") mv "$DIR" "$WORKSPACE_DIR"/"$DIR_NAME" - cp "$OCRD_WORKFLOW_DIR"/*eval.txt.nf "$WORKSPACE_DIR"/"$DIR_NAME" - ls "$WORKSPACE_DIR"/"$DIR_NAME" - cp -r "$WORKSPACE_DIR"/"$DIR_NAME"/data/* "$WORKSPACE_DIR"/"$DIR_NAME"/ + cp "$OCRD_WORKFLOW_DIR"/*eval.txt.nf "$WORKSPACE_DIR"/"$DIR_NAME"/data/*/ done rm -rf "$WORKSPACE_DIR"/tmp @@ -114,16 +96,16 @@ execute_wfs_and_extract_benchmarks() { DIR_NAME=$(basename $WS_DIR) - run "$WS_DIR"/*ocr.txt.nf "$DIR_NAME" "$WS_DIR" - run "$WS_DIR"/*eval.txt.nf "$DIR_NAME" "$WS_DIR" + run "$WS_DIR"/data/*/*ocr.txt.nf "$DIR_NAME" "$WS_DIR" + run "$WS_DIR"/data/*/*eval.txt.nf "$DIR_NAME" "$WS_DIR" # create a result JSON according to the specs echo "Get Benchmark JSON …" - quiver benchmarks-extraction "$WS_DIR" "$WORKFLOW" + quiver benchmarks-extraction "$WS_DIR"/data/* "$WORKFLOW" echo "Done." # move data to results dir - mv "$WS_DIR"/*.json "$WORKFLOW_DIR"/results + mv "$WS_DIR"/data/*/*.json "$WORKFLOW_DIR"/results fi done cd "$ROOT" || exit @@ -132,7 +114,7 @@ execute_wfs_and_extract_benchmarks() { adjust_workflow_settings() { # $1: $WORKFLOW # $2: $DIR_NAME - sed -i "s CURRENT app/workflows/workspaces/$2 g" "$1" + sed -i "s CURRENT app/workflows/workspaces/$2/data/*/ g" "$1" } rename_and_move_nextflow_result() { @@ -142,13 +124,13 @@ rename_and_move_nextflow_result() { WORKFLOW_NAME=$(basename -s .txt.nf "$1") rm "$WORKFLOW_DIR"/nf-results/*process_completed.json mv "$WORKFLOW_DIR"/nf-results/*_completed.json "$WORKFLOW_DIR"/results/"$2"_"$WORKFLOW_NAME"_completed.json - if [ $WORKFLOW_NAME != "dinglehopper_eval" ]; then + if [ "$WORKFLOW_NAME" != "dinglehopper_eval" ]; then for DIR in "$WORKSPACE_DIR"/work/* do - WORK_DIR_NAME=$(basename $DIR) - for SUB_WORK_DIR in $DIR/* + WORK_DIR_NAME=$(basename "$DIR") + for SUB_WORK_DIR in "$DIR"/* do - SUB_WORK_DIR_NAME=$(basename $SUB_WORK_DIR) + SUB_WORK_DIR_NAME=$(basename "$SUB_WORK_DIR") mv "$WORKSPACE_DIR"/work/"$WORK_DIR_NAME"/"$SUB_WORK_DIR_NAME"/.command.log "$WORKSPACE_DIR"/"$2"/"$WORK_DIR_NAME"_"$SUB_WORK_DIR_NAME".command.log done @@ -165,7 +147,7 @@ run() { adjust_workflow_settings "$1" "$2" nextflow run "$1" -with-weblog http://127.0.0.1:8000/nextflow/ rename_and_move_nextflow_result "$1" "$2" - save_workspaces "$3" "$2" "$1" + save_workspaces "$3"/data "$2" "$1" } save_workspaces() { @@ -173,7 +155,7 @@ save_workspaces() { # $2: $DIR_NAME # $3: $WORKFLOW echo "Zipping workspace $1" - ocrd zip bag -d $1 -i $1 $1 + ocrd zip bag -d "$DIR_NAME"/data/* -i "$DIR_NAME"/data/* "$DIR_NAME" WORKFLOW_NAME=$(basename -s .txt.nf "$3") mv "$WORKSPACE_DIR"/"$2".zip "$WORKFLOW_DIR"/results/"$2"_"$WORKFLOW_NAME".zip } -- GitLab