diff --git a/.gitignore b/.gitignore index 4da1598e9800433f085c4fabf79c2be8c5b16df3..77ec93a82bff41fe605047ced6a7df55ffe8a490 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,5 @@ workflows/nf-results/* workflows/results workflows/ocrd-workflows/*.nf models -.idea \ No newline at end of file +.idea +gt/* diff --git a/Dockerfile b/Dockerfile index 02321b76f01e828150eeb5dfc9503c54592d221f..ae906dfc8312def615efdadc8008fa69256b3991 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,6 +5,7 @@ WORKDIR /app COPY requirements.txt requirements.txt RUN apt install git +RUN apt install -y jq RUN apt-get update RUN apt-get install -y --fix-missing openjdk-11-jre @@ -14,7 +15,6 @@ COPY README.md README.md RUN git init RUN git submodule add https://github.com/MehmedGIT/OtoN_Converter submodules/oton -RUN git submodule add https://github.com/OCR-D/quiver-data submodules/quiver-data RUN git submodule update --init RUN cd submodules/oton && \ @@ -22,10 +22,13 @@ RUN cd submodules/oton && \ sed -i "s \$projectDir/ocrd-workspace/ $WORKSPACE_DIR/CURRENT/ g" oton/config.toml && \ pip install . +COPY prepare.sh prepare.sh +COPY default_data_sources.txt default_data_sources.txt + RUN pip3 install -r requirements.txt RUN pip3 install . RUN nextflow COPY workflows workflows -CMD [ "bash", "workflows/execute_workflows.sh" ] \ No newline at end of file +ENTRYPOINT [ "bash" ] \ No newline at end of file diff --git a/README.md b/README.md index 5f5f3f977b9a1f0a7866e33564b0665d67f98d27..98e95f0f07e5af4aea4ff5de44c4c8dd3653981c 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ QuiVer Benchmarks is based on `ocrd/all:maximum` and has all OCR-D processors at - Docker >= 23.0.0 - [Docker Compose plugin](https://docs.docker.com/compose/install/linux/#install-using-the-repository) -To speed up QuiVer Benchmarks you can mount already downloaded text recognition models to `/usr/local/share/ocrd-resources/` in `docker compose.yml` by adding +To speed up QuiVer Benchmarks you can mount already downloaded text recognition models to `/usr/local/share/ocrd-resources/` in `docker-compose.yml` by adding ```yml - path/to/your/models:/usr/local/share/ocrd-resources/ @@ -22,9 +22,12 @@ Otherwise, the tool will download all `ocrd-tesserocr-recognize` models as well ## Usage -- clone this repository +- clone this repository and switch to the cloned directory - (optional) [customize](#custom-workflows-and-data) QuiVer Benchmarks according to your needs -- run `docker compose up --build` +- build the image with `docker compose build` +- spin up a container with `docker compose run -d app` +- run `docker compose exec app bash prepare.sh` +- run `docker compose exec app bash workflows/execute_workflows.sh` - the benchmarks and the evaluation results will be available at `data/workflows.json` on your host system ## Benchmarks Considered diff --git a/data/workflows.json b/data/workflows.json index aef35aa55c7f266422c4ea6df3eead048f1ba241..b377f0b605db2748574e2e5041fc832bcbc9e080 100644 --- a/data/workflows.json +++ b/data/workflows.json @@ -1,190 +1,108 @@ [ { - "eval_workflow_id": "wf-data16_frak_simple_selected_pages_ocr-eval", - "label": "Workflow on data 16_frak_simple_selected_pages_ocr", + "eval_workflow_id": "wf-data16_ant_complex_alberti_pictura_1540_minimal_ocr-eval", + "label": "Workflow on data 16_ant_complex_alberti_pictura_1540_minimal_ocr", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_frak_simple.ocrd.zip", - "label": "GT workspace 16th century Black letter simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_ant_complex.ocrd.zip", + "label": "GT workspace 16th century Antiqua complex layout" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 16_frak_simple_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_complex_alberti_pictura_1540_minimal_ocr_ocr.zip", + "label": "OCR workspace for 16_ant_complex_alberti_pictura_1540_minimal_ocr" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_frak_simple_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_complex_alberti_pictura_1540_minimal_ocr_evaluation.zip", + "label": "Evaluation workspace for 16_ant_complex_alberti_pictura_1540_minimal_ocr" }, "workflow_steps": [ { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 301, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", - "params": { - "level-of-operation": "page", - "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-cis-ocropy-segment", - "params": { - "level-of-operation": "page", - "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", + "id": "ocrd-tesserocr-recognize", "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, - { - "id": "ocrd-calamari-recognize", - "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" } } ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", "document_metadata": { "data_properties": { "fonts": [ + "Antiqua", "Black Letter" ], "publication_century": "1500-1600", "publication_decade": "", "publication_year": "16th century", - "number_of_pages": 6, - "layout": "simple" + "number_of_pages": 3, + "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 249.623269, - "cpu_time": 342.24125499999997, - "cer_mean": 0.12233144923609811, - "cer_median": 0.0697989946429397, + "wall_time": 7.386463, + "cpu_time": 9.83532, + "cer_mean": 0.10240852523716282, + "cer_median": 0.10536980749746708, "cer_range": [ - 0.05172413793103448, - 0.38848920863309355 + 0.07124352331606218, + 0.1306122448979592 ], - "cer_standard_deviation": 0.13173038899014955, - "wer": 0.26886580164226287, - "pages_per_minute": 1.4421732454757652 + "cer_standard_deviation": 0.02979493530847308, + "wer": 0.23466068901129858, + "pages_per_minute": 24.368902951250146 }, "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.10179640718562874, - "wer": 0.3008849557522124 - }, - { - "page_id": "phys_0002", - "cer": 0.05238095238095238, - "wer": 0.1568627450980392 - }, - { - "page_id": "phys_0003", - "cer": 0.38848920863309355, - "wer": 0.44 - }, { "page_id": "phys_0007", - "cer": 0.06255144032921811, - "wer": 0.2766990291262136 + "cer": 0.07124352331606218, + "wer": 0.2231404958677686 }, { - "page_id": "phys_0021", - "cer": 0.05172413793103448, - "wer": 0.20833333333333334 + "page_id": "phys_0008", + "cer": 0.10536980749746708, + "wer": 0.2484472049689441 }, { - "page_id": "phys_0029", - "cer": 0.07704654895666131, - "wer": 0.2304147465437788 + "page_id": "phys_0009", + "cer": 0.1306122448979592, + "wer": 0.2323943661971831 } ] } }, { - "eval_workflow_id": "wf-data16_frak_simple_slower_processors_ocr-eval", - "label": "Workflow on data 16_frak_simple_slower_processors_ocr", + "eval_workflow_id": "wf-data18_frak_simple_lessing_menschengeschlecht_1780_slower_processors_ocr-eval", + "label": "Workflow on data 18_frak_simple_lessing_menschengeschlecht_1780_slower_processors_ocr", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", @@ -195,16 +113,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_frak_simple.ocrd.zip", - "label": "GT workspace 16th century Black letter simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_simple.ocrd.zip", + "label": "GT workspace 18th century Black letter simple layout" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 16_frak_simple_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_simple_lessing_menschengeschlecht_1780_slower_processors_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_simple_lessing_menschengeschlecht_1780_slower_processors_ocr" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_frak_simple_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_simple_lessing_menschengeschlecht_1780_slower_processors_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_simple_lessing_menschengeschlecht_1780_slower_processors_ocr" }, "workflow_steps": [ { @@ -319,157 +237,217 @@ "document_metadata": { "data_properties": { "fonts": [ + "Antiqua", "Black Letter" ], - "publication_century": "1500-1600", + "publication_century": "1700-1800", "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 6, + "publication_year": "18th century", + "number_of_pages": 1, "layout": "simple" } } }, "evaluation_results": { "document_wide": { - "wall_time": 93.86911700000002, - "cpu_time": 135.81944000000001, - "cer_mean": 0.23049280809064063, - "cer_median": 0.09185773074661964, + "wall_time": 8.055356999999999, + "cpu_time": 12.105611, + "cer_mean": 0.0199501246882793, + "cer_median": 0.0199501246882793, "cer_range": [ - 0.0658682634730539, - 0.9568345323741008 + 0.0199501246882793, + 0.0199501246882793 ], - "cer_standard_deviation": 0.3560945390007154, - "wer": 0.36710708997967917, - "pages_per_minute": 3.835127159020788 + "cer_standard_deviation": null, + "wer": 0.09836065573770492, + "pages_per_minute": 7.44845945375233 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.0658682634730539, - "wer": 0.18584070796460178 - }, - { - "page_id": "phys_0002", - "cer": 0.08412698412698413, - "wer": 0.11764705882352941 - }, - { - "page_id": "phys_0003", - "cer": 0.9568345323741008, - "wer": 0.9666666666666667 - }, - { - "page_id": "phys_0007", - "cer": 0.09958847736625515, - "wer": 0.36893203883495146 - }, - { - "page_id": "phys_0021", - "cer": 0.10109717868338558, - "wer": 0.2916666666666667 - }, - { - "page_id": "phys_0029", - "cer": 0.0754414125200642, - "wer": 0.271889400921659 + "cer": 0.0199501246882793, + "wer": 0.09836065573770492 } ] } }, { - "eval_workflow_id": "wf-data19_frak_simple_minimal_ocr-eval", - "label": "Workflow on data 19_frak_simple_minimal_ocr", + "eval_workflow_id": "wf-data18_frak_complex_nn_besuch_1780_selected_pages_ocr-eval", + "label": "Workflow on data 18_frak_complex_nn_besuch_1780_selected_pages_ocr", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/19_frak_simple.ocrd.zip", - "label": "GT workspace 19th century Black letter simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", + "label": "GT workspace 18th century Black letter complex layout" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_frak_simple_minimal_ocr_ocr.zip", - "label": "OCR workspace for 19_frak_simple_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_nn_besuch_1780_selected_pages_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_complex_nn_besuch_1780_selected_pages_ocr" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_frak_simple_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 19_frak_simple_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_nn_besuch_1780_selected_pages_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_complex_nn_besuch_1780_selected_pages_ocr" }, "workflow_steps": [ { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-binarize", "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" + "level-of-operation": "page" } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Black Letter" + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "level-of-operation": "page", + "dpi": 0, + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" ], - "publication_century": "1800-1900", + "publication_century": "1700-1800", "publication_decade": "", - "publication_year": "19th century", - "number_of_pages": 1, - "layout": "simple" + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 4.533288, - "cpu_time": 5.404413, - "cer_mean": 0.004721435316336166, - "cer_median": 0.004721435316336166, + "wall_time": 76.547632, + "cpu_time": 113.81064699999999, + "cer_mean": 0.05959514724403742, + "cer_median": 0.0324919239726542, "cer_range": [ - 0.004721435316336166, - 0.004721435316336166 + 0.008839779005524863, + 0.16455696202531644 ], - "cer_standard_deviation": null, - "wer": 0.015873015873015872, - "pages_per_minute": 13.23542647191178 + "cer_standard_deviation": 0.07087591559854228, + "wer": 0.13238323035014285, + "pages_per_minute": 3.1353027354262246 }, "by_page": [ { - "page_id": "phys_0001", - "cer": 0.004721435316336166, - "wer": 0.015873015873015872 + "page_id": "phys_00001", + "cer": 0.16455696202531644, + "wer": 0.27586206896551724 + }, + { + "page_id": "phys_00002", + "cer": 0.008839779005524863, + "wer": 0.04046242774566474 + }, + { + "page_id": "phys_00003", + "cer": 0.030501089324618737, + "wer": 0.09826589595375723 + }, + { + "page_id": "phys_00004", + "cer": 0.034482758620689655, + "wer": 0.11494252873563218 } ] } }, { - "eval_workflow_id": "wf-data16_ant_simple_selected_pages_ocr-eval", - "label": "Workflow on data 16_ant_simple_selected_pages_ocr", + "eval_workflow_id": "wf-data18_frak_complex_estor_rechtsgelehrsamkeit02_1758_selected_pages_ocr-eval", + "label": "Workflow on data 18_frak_complex_estor_rechtsgelehrsamkeit02_1758_selected_pages_ocr", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", @@ -480,16 +458,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_ant_simple.ocrd.zip", - "label": "GT workspace 16th century Antiqua simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", + "label": "GT workspace 18th century Black letter complex layout" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_simple_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 16_ant_simple_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_estor_rechtsgelehrsamkeit02_1758_selected_pages_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_complex_estor_rechtsgelehrsamkeit02_1758_selected_pages_ocr" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_simple_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_ant_simple_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_estor_rechtsgelehrsamkeit02_1758_selected_pages_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_complex_estor_rechtsgelehrsamkeit02_1758_selected_pages_ocr" }, "workflow_steps": [ { @@ -592,52 +570,58 @@ "document_metadata": { "data_properties": { "fonts": [ - "Antiqua" + "Antiqua", + "Black Letter" ], - "publication_century": "1500-1600", + "publication_century": "1700-1800", "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 3, - "layout": "simple" + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 63.38978499999999, - "cpu_time": 97.14254, - "cer_mean": 0.05488709037929677, - "cer_median": 0.03488372093023256, + "wall_time": 154.933488, + "cpu_time": 215.310644, + "cer_mean": 0.12432674382712249, + "cer_median": 0.12040572654031183, "cer_range": [ - 0.028395061728395062, - 0.10138248847926268 + 0.0718294051627385, + 0.18466611706512778 ], - "cer_standard_deviation": 0.04039668560556835, - "wer": 0.13745817563490756, - "pages_per_minute": 2.8395742310847094 + "cer_standard_deviation": 0.04665572742682036, + "wer": 0.18974770097390478, + "pages_per_minute": 1.5490518098966441 }, "by_page": [ { - "page_id": "phys_0007", - "cer": 0.10138248847926268, - "wer": 0.17475728155339806 + "page_id": "phys_0001", + "cer": 0.12757201646090535, + "wer": 0.14035087719298245 }, { - "page_id": "phys_0013", - "cer": 0.03488372093023256, - "wer": 0.13592233009708737 + "page_id": "phys_0002", + "cer": 0.1132394366197183, + "wer": 0.1950354609929078 }, { - "page_id": "phys_0014", - "cer": 0.028395061728395062, - "wer": 0.1016949152542373 + "page_id": "phys_0003", + "cer": 0.0718294051627385, + "wer": 0.15087719298245614 + }, + { + "page_id": "phys_0004", + "cer": 0.18466611706512778, + "wer": 0.2727272727272727 } ] } }, { - "eval_workflow_id": "wf-data16_ant_simple_slower_processors_ocr-eval", - "label": "Workflow on data 16_ant_simple_slower_processors_ocr", + "eval_workflow_id": "wf-data18_fontmix_complex_benner_herrnhuterey04_1748_slower_processors_ocr-eval", + "label": "Workflow on data 18_fontmix_complex_benner_herrnhuterey04_1748_slower_processors_ocr", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", @@ -648,16 +632,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_ant_simple.ocrd.zip", - "label": "GT workspace 16th century Antiqua simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_fontmix_complex.ocrd.zip", + "label": "GT workspace 18th century Font Mix complex layout" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_simple_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 16_ant_simple_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_fontmix_complex_benner_herrnhuterey04_1748_slower_processors_ocr_ocr.zip", + "label": "OCR workspace for 18_fontmix_complex_benner_herrnhuterey04_1748_slower_processors_ocr" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_simple_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_ant_simple_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_fontmix_complex_benner_herrnhuterey04_1748_slower_processors_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_fontmix_complex_benner_herrnhuterey04_1748_slower_processors_ocr" }, "workflow_steps": [ { @@ -772,274 +756,90 @@ "document_metadata": { "data_properties": { "fonts": [ - "Antiqua" + "Antiqua", + "Black Letter" ], - "publication_century": "1500-1600", + "publication_century": "1700-1800", "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 3, - "layout": "simple" + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 28.936798, - "cpu_time": 46.368261, - "cer_mean": 0.06721136853878373, - "cer_median": 0.055232558139534885, + "wall_time": 67.309474, + "cpu_time": 101.85330800000001, + "cer_mean": 0.16726583056278752, + "cer_median": 0.09637318392327315, "cer_range": [ - 0.03580246913580247, - 0.11059907834101383 + 0.03187250996015936, + 0.4444444444444444 ], - "cer_standard_deviation": 0.038810463938030185, - "wer": 0.1847677033624047, - "pages_per_minute": 6.220453278901141 + "cer_standard_deviation": 0.18889822286887584, + "wer": 0.28912998545359864, + "pages_per_minute": 3.565619900699269 }, "by_page": [ { - "page_id": "phys_0007", - "cer": 0.11059907834101383, - "wer": 0.23300970873786409 + "page_id": "phys_0001", + "cer": 0.4444444444444444, + "wer": 0.6204379562043796 }, { - "page_id": "phys_0013", - "cer": 0.055232558139534885, - "wer": 0.1941747572815534 + "page_id": "phys_0002", + "cer": 0.03187250996015936, + "wer": 0.10919540229885058 }, { - "page_id": "phys_0014", - "cer": 0.03580246913580247, - "wer": 0.1271186440677966 + "page_id": "phys_0003", + "cer": 0.1266778523489933, + "wer": 0.2681564245810056 + }, + { + "page_id": "phys_0004", + "cer": 0.06606851549755302, + "wer": 0.15873015873015872 } ] } }, { - "eval_workflow_id": "wf-data18_frak_simple_minimal_ocr-eval", - "label": "Workflow on data 18_frak_simple_minimal_ocr", + "eval_workflow_id": "wf-data18_frak_complex_justi_abhandlung01_1758_selected_pages_ocr-eval", + "label": "Workflow on data 18_frak_complex_justi_abhandlung01_1758_selected_pages_ocr", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_simple.ocrd.zip", - "label": "GT workspace 18th century Black letter simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", + "label": "GT workspace 18th century Black letter complex layout" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_simple_minimal_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_simple_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_justi_abhandlung01_1758_selected_pages_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_complex_justi_abhandlung01_1758_selected_pages_ocr" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_simple_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_simple_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_justi_abhandlung01_1758_selected_pages_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_complex_justi_abhandlung01_1758_selected_pages_ocr" }, "workflow_steps": [ { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-binarize", "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Black Letter" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 1, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 1.900465, - "cpu_time": 2.519007, - "cer_mean": 0.02493765586034913, - "cer_median": 0.02493765586034913, - "cer_range": [ - 0.02493765586034913, - 0.02493765586034913 - ], - "cer_standard_deviation": null, - "wer": 0.09836065573770492, - "pages_per_minute": 31.571220727558778 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.02493765586034913, - "wer": 0.09836065573770492 - } - ] - } - }, - { - "eval_workflow_id": "wf-data19_ant_simple_minimal_ocr-eval", - "label": "Workflow on data 19_ant_simple_minimal_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/19_ant_simple.ocrd.zip", - "label": "GT workspace 19th century Antiqua simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_ant_simple_minimal_ocr_ocr.zip", - "label": "OCR workspace for 19_ant_simple_minimal_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_ant_simple_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 19_ant_simple_minimal_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-tesserocr-recognize", - "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua" - ], - "publication_century": "1800-1900", - "publication_decade": "", - "publication_year": "19th century", - "number_of_pages": 3, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 12.639051, - "cpu_time": 15.064324, - "cer_mean": 0.08328200324172261, - "cer_median": 0.08736842105263158, - "cer_range": [ - 0.04055496264674493, - 0.12192262602579132 - ], - "cer_standard_deviation": 0.04083746158658049, - "wer": 0.23519468186134854, - "pages_per_minute": 14.241575573988902 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.08736842105263158, - "wer": 0.22666666666666666 - }, - { - "page_id": "phys_0002", - "cer": 0.04055496264674493, - "wer": 0.14814814814814814 - }, - { - "page_id": "phys_0003", - "cer": 0.12192262602579132, - "wer": 0.33076923076923076 - } - ] - } - }, - { - "eval_workflow_id": "wf-data19_frak_simple_slower_processors_ocr-eval", - "label": "Workflow on data 19_frak_simple_slower_processors_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/19_frak_simple.ocrd.zip", - "label": "GT workspace 19th century Black letter simple layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_frak_simple_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 19_frak_simple_slower_processors_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_frak_simple_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 19_frak_simple_slower_processors_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" + "level-of-operation": "page" } }, { @@ -1061,11 +861,22 @@ } }, { - "id": "ocrd-cis-ocropy-denoise", + "id": "ocrd-skimage-binarize", "params": { + "method": "li", "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 } }, { @@ -1077,28 +888,22 @@ } }, { - "id": "ocrd-tesserocr-segment", + "id": "ocrd-cis-ocropy-segment", "params": { + "level-of-operation": "page", "dpi": 0, - "padding": 4, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 } }, { @@ -1111,73 +916,72 @@ } }, { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-calamari-recognize", "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "segmentation_level": "word", - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 } } ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", "document_metadata": { "data_properties": { "fonts": [ + "Antiqua", "Black Letter" ], - "publication_century": "1800-1900", + "publication_century": "1700-1800", "publication_decade": "", - "publication_year": "19th century", - "number_of_pages": 1, - "layout": "simple" + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 13.078715, - "cpu_time": 20.989223000000003, - "cer_mean": 0.0056657223796034, - "cer_median": 0.0056657223796034, + "wall_time": 122.82167999999999, + "cpu_time": 173.67148600000002, + "cer_mean": 0.05991629368071161, + "cer_median": 0.0496849827491332, "cer_range": [ - 0.0056657223796034, - 0.0056657223796034 + 0.0019973368841544607, + 0.13829787234042554 ], - "cer_standard_deviation": null, - "wer": 0.031746031746031744, - "pages_per_minute": 4.58760665707602 + "cer_standard_deviation": 0.06406814571829976, + "wer": 0.09724037283059882, + "pages_per_minute": 1.954052411593784 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.0056657223796034, - "wer": 0.031746031746031744 + "cer": 0.013584117032392894, + "wer": 0.06666666666666667 + }, + { + "page_id": "phys_0002", + "cer": 0.13829787234042554, + "wer": 0.1875 + }, + { + "page_id": "phys_0003", + "cer": 0.08578584846587352, + "wer": 0.12598425196850394 + }, + { + "page_id": "phys_0004", + "cer": 0.0019973368841544607, + "wer": 0.00881057268722467 } ] } }, { - "eval_workflow_id": "wf-data18_frak_simple_selected_pages_ocr-eval", - "label": "Workflow on data 18_frak_simple_selected_pages_ocr", + "eval_workflow_id": "wf-data18_frak_complex_luz_blitz_1784_selected_pages_ocr-eval", + "label": "Workflow on data 18_frak_complex_luz_blitz_1784_selected_pages_ocr", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", @@ -1188,16 +992,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_simple.ocrd.zip", - "label": "GT workspace 18th century Black letter simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", + "label": "GT workspace 18th century Black letter complex layout" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_simple_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_simple_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_luz_blitz_1784_selected_pages_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_complex_luz_blitz_1784_selected_pages_ocr" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_simple_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_simple_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_luz_blitz_1784_selected_pages_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_complex_luz_blitz_1784_selected_pages_ocr" }, "workflow_steps": [ { @@ -1300,42 +1104,164 @@ "document_metadata": { "data_properties": { "fonts": [ + "Antiqua", "Black Letter" ], "publication_century": "1700-1800", "publication_decade": "", "publication_year": "18th century", - "number_of_pages": 1, - "layout": "simple" + "number_of_pages": 4, + "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 24.445896000000005, - "cpu_time": 35.454705999999995, - "cer_mean": 0.02493765586034913, - "cer_median": 0.02493765586034913, + "wall_time": 145.735792, + "cpu_time": 208.46528999999998, + "cer_mean": 0.05256131079939043, + "cer_median": 0.029898228835687546, "cer_range": [ - 0.02493765586034913, - 0.02493765586034913 + 0.009184845005740528, + 0.1412639405204461 ], - "cer_standard_deviation": null, - "wer": 0.09836065573770492, - "pages_per_minute": 2.4543997078282582 + "cer_standard_deviation": 0.06044359233653714, + "wer": 0.15936235092429332, + "pages_per_minute": 1.6468157664384877 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.02493765586034913, - "wer": 0.09836065573770492 - } - ] + "cer": 0.1412639405204461, + "wer": 0.38461538461538464 + }, + { + "page_id": "phys_0002", + "cer": 0.039473684210526314, + "wer": 0.13043478260869565 + }, + { + "page_id": "phys_0003", + "cer": 0.020322773460848775, + "wer": 0.06792452830188679 + }, + { + "page_id": "phys_0004", + "cer": 0.009184845005740528, + "wer": 0.054474708171206226 + } + ] + } + }, + { + "eval_workflow_id": "wf-data18_frak_complex_luz_blitz_1784_minimal_ocr-eval", + "label": "Workflow on data 18_frak_complex_luz_blitz_1784_minimal_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", + "label": "GT workspace 18th century Black letter complex layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_luz_blitz_1784_minimal_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_complex_luz_blitz_1784_minimal_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_luz_blitz_1784_minimal_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_complex_luz_blitz_1784_minimal_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 18.435259, + "cpu_time": 21.704126, + "cer_mean": 0.030367084502750087, + "cer_median": 0.02381735856100435, + "cer_range": [ + 0.014354066985645933, + 0.05947955390334572 + ], + "cer_standard_deviation": 0.02030808283356641, + "wer": 0.08722583259487592, + "pages_per_minute": 13.018531499882915 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.05947955390334572, + "wer": 0.1794871794871795 + }, + { + "page_id": "phys_0002", + "cer": 0.014354066985645933, + "wer": 0.043478260869565216 + }, + { + "page_id": "phys_0003", + "cer": 0.02869097429766886, + "wer": 0.07924528301886792 + }, + { + "page_id": "phys_0004", + "cer": 0.01894374282433984, + "wer": 0.04669260700389105 + } + ] } }, { - "eval_workflow_id": "wf-data18_frak_simple_slower_processors_ocr-eval", - "label": "Workflow on data 18_frak_simple_slower_processors_ocr", + "eval_workflow_id": "wf-data16_frak_simple_luther_auszlegunge_1520_slower_processors_ocr-eval", + "label": "Workflow on data 16_frak_simple_luther_auszlegunge_1520_slower_processors_ocr", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", @@ -1346,16 +1272,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_simple.ocrd.zip", - "label": "GT workspace 18th century Black letter simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_frak_simple.ocrd.zip", + "label": "GT workspace 16th century Black letter simple layout" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_simple_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 18_frak_simple_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_luther_auszlegunge_1520_slower_processors_ocr_ocr.zip", + "label": "OCR workspace for 16_frak_simple_luther_auszlegunge_1520_slower_processors_ocr" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_simple_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_frak_simple_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_luther_auszlegunge_1520_slower_processors_ocr_evaluation.zip", + "label": "Evaluation workspace for 16_frak_simple_luther_auszlegunge_1520_slower_processors_ocr" }, "workflow_steps": [ { @@ -1470,62 +1396,68 @@ "document_metadata": { "data_properties": { "fonts": [ + "Antiqua", "Black Letter" ], - "publication_century": "1700-1800", + "publication_century": "1500-1600", "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 1, + "publication_year": "16th century", + "number_of_pages": 2, "layout": "simple" } } }, "evaluation_results": { "document_wide": { - "wall_time": 7.974276000000001, - "cpu_time": 11.831112000000001, - "cer_mean": 0.0199501246882793, - "cer_median": 0.0199501246882793, + "wall_time": 36.087631, + "cpu_time": 47.946267999999996, + "cer_mean": 0.5161379724470825, + "cer_median": 0.5161379724470825, "cer_range": [ - 0.0199501246882793, - 0.0199501246882793 + 0.0754414125200642, + 0.9568345323741008 ], - "cer_standard_deviation": null, - "wer": 0.09836065573770492, - "pages_per_minute": 7.524194045954767 + "cer_standard_deviation": 0.6232390519399567, + "wer": 0.6192780337941628, + "pages_per_minute": 3.3252390548994475 }, "by_page": [ { - "page_id": "phys_0001", - "cer": 0.0199501246882793, - "wer": 0.09836065573770492 + "page_id": "phys_0003", + "cer": 0.9568345323741008, + "wer": 0.9666666666666667 + }, + { + "page_id": "phys_0029", + "cer": 0.0754414125200642, + "wer": 0.271889400921659 } ] } }, { - "eval_workflow_id": "wf-data18_fontmix_complex_selected_pages_ocr-eval", - "label": "Workflow on data 18_fontmix_complex_selected_pages_ocr", + "eval_workflow_id": "wf-data18_frak_complex_bernd_lebensbeschreibung_1738_slower_processors_ocr-eval", + "label": "Workflow on data 18_frak_complex_bernd_lebensbeschreibung_1738_slower_processors_ocr", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", - "label": "OCR Workflow selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_fontmix_complex.ocrd.zip", - "label": "GT workspace 18th century Font Mix complex layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", + "label": "GT workspace 18th century Black letter complex layout" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_fontmix_complex_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 18_fontmix_complex_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_bernd_lebensbeschreibung_1738_slower_processors_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_complex_bernd_lebensbeschreibung_1738_slower_processors_ocr" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_fontmix_complex_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_fontmix_complex_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_bernd_lebensbeschreibung_1738_slower_processors_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_complex_bernd_lebensbeschreibung_1738_slower_processors_ocr" }, "workflow_steps": [ { @@ -1559,22 +1491,11 @@ } }, { - "id": "ocrd-skimage-binarize", - "params": { - "method": "li", - "level-of-operation": "page", - "dpi": 0, - "window_size": 301, - "k": 0.34 - } - }, - { - "id": "ocrd-skimage-denoise", + "id": "ocrd-cis-ocropy-denoise", "params": { "level-of-operation": "page", - "dpi": 0, - "protect": 0.0, - "maxsize": 1.0 + "noise_maxsize": 3.0, + "dpi": 0 } }, { @@ -1586,22 +1507,28 @@ } }, { - "id": "ocrd-cis-ocropy-segment", + "id": "ocrd-tesserocr-segment", "params": { - "level-of-operation": "page", "dpi": 0, - "maxcolseps": 20, - "maxseps": 20, - "maximages": 10, - "csminheight": 4, - "hlminwidth": 10, - "gap_height": 0.01, - "gap_width": 1.5, - "overwrite_order": true, - "overwrite_separators": true, - "overwrite_regions": true, - "overwrite_lines": true, - "spread": 2.4 + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" } }, { @@ -1614,17 +1541,34 @@ } }, { - "id": "ocrd-calamari-recognize", + "id": "ocrd-tesserocr-recognize", "params": { - "checkpoint_dir": "qurator-gt4histocr-1.0", - "voter": "confidence_voter_default_ctc", - "textequiv_level": "line", - "glyph_conf_cutoff": 0.001 + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" } } ], - "workflow_model": "qurator-gt4histocr-1.0", - "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", "document_metadata": { "data_properties": { "fonts": [ @@ -1634,94 +1578,5365 @@ "publication_century": "1700-1800", "publication_decade": "", "publication_year": "18th century", - "number_of_pages": 4, + "number_of_pages": 3, "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 155.643285, - "cpu_time": 267.033236, - "cer_mean": 0.140061110527823, - "cer_median": 0.06466848046246318, + "wall_time": 34.033219, + "cpu_time": 53.635996999999996, + "cer_mean": 0.06688670097710152, + "cer_median": 0.03746177370030581, "cer_range": [ - 0.028685258964143426, - 0.4022222222222222 + 0.0290519877675841, + 0.13414634146341464 ], - "cer_standard_deviation": 0.17581800739756628, - "wer": 0.23651027895776314, - "pages_per_minute": 1.541987500456573 + "cer_standard_deviation": 0.058400133164399716, + "wer": 0.12383493220296966, + "pages_per_minute": 5.2889501871686 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.4022222222222222, - "wer": 0.5474452554744526 + "cer": 0.13414634146341464, + "wer": 0.22058823529411764 }, { "page_id": "phys_0002", - "cer": 0.028685258964143426, - "wer": 0.09195402298850575 + "cer": 0.0290519877675841, + "wer": 0.05357142857142857 }, { "page_id": "phys_0003", - "cer": 0.07550335570469799, - "wer": 0.19553072625698323 - }, - { - "page_id": "phys_0004", - "cer": 0.053833605220228384, - "wer": 0.1111111111111111 + "cer": 0.03746177370030581, + "wer": 0.09734513274336283 } ] } }, { - "eval_workflow_id": "wf-data16_frak_simple_minimal_ocr-eval", - "label": "Workflow on data 16_frak_simple_minimal_ocr", + "eval_workflow_id": "wf-data18_frak_complex_euler_rechenkunst01_1738_slower_processors_ocr-eval", + "label": "Workflow on data 18_frak_complex_euler_rechenkunst01_1738_slower_processors_ocr", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_frak_simple.ocrd.zip", - "label": "GT workspace 16th century Black letter simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", + "label": "GT workspace 18th century Black letter complex layout" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_minimal_ocr_ocr.zip", - "label": "OCR workspace for 16_frak_simple_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_euler_rechenkunst01_1738_slower_processors_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_complex_euler_rechenkunst01_1738_slower_processors_ocr" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_frak_simple_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_euler_rechenkunst01_1738_slower_processors_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_complex_euler_rechenkunst01_1738_slower_processors_ocr" }, "workflow_steps": [ { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-binarize", "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 6, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 78.308453, + "cpu_time": 124.319609, + "cer_mean": 0.1500034071140843, + "cer_median": 0.12202020202020203, + "cer_range": [ + 0.03902862098872507, + 0.39473684210526316 + ], + "cer_standard_deviation": 0.12934283380557174, + "wer": 0.27891577144281926, + "pages_per_minute": 4.597204850924586 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.03902862098872507, + "wer": 0.0872093023255814 + }, + { + "page_id": "phys_0002", + "cer": 0.39473684210526316, + "wer": 0.6402439024390244 + }, + { + "page_id": "phys_0003", + "cer": 0.16756341275941583, + "wer": 0.3592233009708738 + }, + { + "page_id": "phys_0004", + "cer": 0.13737373737373737, + "wer": 0.2 + }, + { + "page_id": "phys_0005", + "cer": 0.10666666666666667, + "wer": 0.21893491124260356 + }, + { + "page_id": "phys_0006", + "cer": 0.05465116279069768, + "wer": 0.1678832116788321 + } + ] + } + }, + { + "eval_workflow_id": "wf-data16_frak_simple_luther_auszlegunge_1520_minimal_ocr-eval", + "label": "Workflow on data 16_frak_simple_luther_auszlegunge_1520_minimal_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_frak_simple.ocrd.zip", + "label": "GT workspace 16th century Black letter simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_luther_auszlegunge_1520_minimal_ocr_ocr.zip", + "label": "OCR workspace for 16_frak_simple_luther_auszlegunge_1520_minimal_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_luther_auszlegunge_1520_minimal_ocr_evaluation.zip", + "label": "Evaluation workspace for 16_frak_simple_luther_auszlegunge_1520_minimal_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 2, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 10.980085, + "cpu_time": 12.980711, + "cer_mean": 0.24855845660550213, + "cer_median": 0.24855845660550213, + "cer_range": [ + 0.07865168539325842, + 0.4184652278177458 + ], + "cer_standard_deviation": 0.2402844601873776, + "wer": 0.37300307219662054, + "pages_per_minute": 10.928877144393692 + }, + "by_page": [ + { + "page_id": "phys_0003", + "cer": 0.4184652278177458, + "wer": 0.48333333333333334 + }, + { + "page_id": "phys_0029", + "cer": 0.07865168539325842, + "wer": 0.2626728110599078 + } + ] + } + }, + { + "eval_workflow_id": "wf-data18_frak_complex_buerger_gedichte_1778_slower_processors_ocr-eval", + "label": "Workflow on data 18_frak_complex_buerger_gedichte_1778_slower_processors_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", + "label": "GT workspace 18th century Black letter complex layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_buerger_gedichte_1778_slower_processors_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_complex_buerger_gedichte_1778_slower_processors_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_buerger_gedichte_1778_slower_processors_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_complex_buerger_gedichte_1778_slower_processors_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 2, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 16.86402, + "cpu_time": 26.750532, + "cer_mean": 0.22786023044476886, + "cer_median": 0.22786023044476886, + "cer_range": [ + 0.053811659192825115, + 0.4019088016967126 + ], + "cer_standard_deviation": 0.24614184997615882, + "wer": 0.35125551082997897, + "pages_per_minute": 7.115741086644821 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.4019088016967126, + "wer": 0.5673758865248227 + }, + { + "page_id": "phys_0002", + "cer": 0.053811659192825115, + "wer": 0.13513513513513514 + } + ] + } + }, + { + "eval_workflow_id": "wf-data19_frak_simple_arnimb_goethe03_1835_selected_pages_ocr-eval", + "label": "Workflow on data 19_frak_simple_arnimb_goethe03_1835_selected_pages_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/19_frak_simple.ocrd.zip", + "label": "GT workspace 19th century Black letter simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_frak_simple_arnimb_goethe03_1835_selected_pages_ocr_ocr.zip", + "label": "OCR workspace for 19_frak_simple_arnimb_goethe03_1835_selected_pages_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_frak_simple_arnimb_goethe03_1835_selected_pages_ocr_evaluation.zip", + "label": "Evaluation workspace for 19_frak_simple_arnimb_goethe03_1835_selected_pages_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "level-of-operation": "page", + "dpi": 0, + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Black Letter" + ], + "publication_century": "1800-1900", + "publication_decade": "", + "publication_year": "19th century", + "number_of_pages": 1, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 45.820398, + "cpu_time": 66.91274899999999, + "cer_mean": 0.007554296506137866, + "cer_median": 0.007554296506137866, + "cer_range": [ + 0.007554296506137866, + 0.007554296506137866 + ], + "cer_standard_deviation": null, + "wer": 0.015873015873015872, + "pages_per_minute": 1.309460472167876 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.007554296506137866, + "wer": 0.015873015873015872 + } + ] + } + }, + { + "eval_workflow_id": "wf-data16_ant_simple_heyden_paedono_1548_slower_processors_ocr-eval", + "label": "Workflow on data 16_ant_simple_heyden_paedono_1548_slower_processors_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_ant_simple.ocrd.zip", + "label": "GT workspace 16th century Antiqua simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_simple_heyden_paedono_1548_slower_processors_ocr_ocr.zip", + "label": "OCR workspace for 16_ant_simple_heyden_paedono_1548_slower_processors_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_simple_heyden_paedono_1548_slower_processors_ocr_evaluation.zip", + "label": "Evaluation workspace for 16_ant_simple_heyden_paedono_1548_slower_processors_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 3, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 32.539348, + "cpu_time": 51.522006000000005, + "cer_mean": 0.06721136853878373, + "cer_median": 0.055232558139534885, + "cer_range": [ + 0.03580246913580247, + 0.11059907834101383 + ], + "cer_standard_deviation": 0.038810463938030185, + "wer": 0.1847677033624047, + "pages_per_minute": 5.5317641890058775 + }, + "by_page": [ + { + "page_id": "phys_0007", + "cer": 0.11059907834101383, + "wer": 0.23300970873786409 + }, + { + "page_id": "phys_0013", + "cer": 0.055232558139534885, + "wer": 0.1941747572815534 + }, + { + "page_id": "phys_0014", + "cer": 0.03580246913580247, + "wer": 0.1271186440677966 + } + ] + } + }, + { + "eval_workflow_id": "wf-data16_ant_complex_alberti_pictura_1540_slower_processors_ocr-eval", + "label": "Workflow on data 16_ant_complex_alberti_pictura_1540_slower_processors_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_ant_complex.ocrd.zip", + "label": "GT workspace 16th century Antiqua complex layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_complex_alberti_pictura_1540_slower_processors_ocr_ocr.zip", + "label": "OCR workspace for 16_ant_complex_alberti_pictura_1540_slower_processors_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_complex_alberti_pictura_1540_slower_processors_ocr_evaluation.zip", + "label": "Evaluation workspace for 16_ant_complex_alberti_pictura_1540_slower_processors_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 3, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 30.780167000000002, + "cpu_time": 52.275692, + "cer_mean": 0.10363204260868718, + "cer_median": 0.11836734693877551, + "cer_range": [ + 0.046632124352331605, + 0.1458966565349544 + ], + "cer_standard_deviation": 0.05124654849483992, + "wer": 0.2145458690579216, + "pages_per_minute": 5.8479214878853645 + }, + "by_page": [ + { + "page_id": "phys_0007", + "cer": 0.046632124352331605, + "wer": 0.1652892561983471 + }, + { + "page_id": "phys_0008", + "cer": 0.1458966565349544, + "wer": 0.2670807453416149 + }, + { + "page_id": "phys_0009", + "cer": 0.11836734693877551, + "wer": 0.2112676056338028 + } + ] + } + }, + { + "eval_workflow_id": "wf-data19_frak_simple_arnimb_goethe03_1835_minimal_ocr-eval", + "label": "Workflow on data 19_frak_simple_arnimb_goethe03_1835_minimal_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/19_frak_simple.ocrd.zip", + "label": "GT workspace 19th century Black letter simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_frak_simple_arnimb_goethe03_1835_minimal_ocr_ocr.zip", + "label": "OCR workspace for 19_frak_simple_arnimb_goethe03_1835_minimal_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_frak_simple_arnimb_goethe03_1835_minimal_ocr_evaluation.zip", + "label": "Evaluation workspace for 19_frak_simple_arnimb_goethe03_1835_minimal_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Black Letter" + ], + "publication_century": "1800-1900", + "publication_decade": "", + "publication_year": "19th century", + "number_of_pages": 1, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 4.437351, + "cpu_time": 5.324579, + "cer_mean": 0.004721435316336166, + "cer_median": 0.004721435316336166, + "cer_range": [ + 0.004721435316336166, + 0.004721435316336166 + ], + "cer_standard_deviation": null, + "wer": 0.015873015873015872, + "pages_per_minute": 13.521580780965943 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.004721435316336166, + "wer": 0.015873015873015872 + } + ] + } + }, + { + "eval_workflow_id": "wf-data18_fontmix_complex_benner_herrnhuterey04_1748_minimal_ocr-eval", + "label": "Workflow on data 18_fontmix_complex_benner_herrnhuterey04_1748_minimal_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_fontmix_complex.ocrd.zip", + "label": "GT workspace 18th century Font Mix complex layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_fontmix_complex_benner_herrnhuterey04_1748_minimal_ocr_ocr.zip", + "label": "OCR workspace for 18_fontmix_complex_benner_herrnhuterey04_1748_minimal_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_fontmix_complex_benner_herrnhuterey04_1748_minimal_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_fontmix_complex_benner_herrnhuterey04_1748_minimal_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 20.134204, + "cpu_time": 23.74436, + "cer_mean": 0.20433470587543373, + "cer_median": 0.1700173533179325, + "cer_range": [ + 0.04063745019920319, + 0.43666666666666665 + ], + "cer_standard_deviation": 0.19097881579976753, + "wer": 0.31449770246107817, + "pages_per_minute": 11.920014319910536 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.43666666666666665, + "wer": 0.583941605839416 + }, + { + "page_id": "phys_0002", + "cer": 0.04063745019920319, + "wer": 0.10344827586206896 + }, + { + "page_id": "phys_0003", + "cer": 0.05536912751677853, + "wer": 0.18435754189944134 + }, + { + "page_id": "phys_0004", + "cer": 0.28466557911908646, + "wer": 0.3862433862433862 + } + ] + } + }, + { + "eval_workflow_id": "wf-data16_ant_simple_heyden_paedono_1548_minimal_ocr-eval", + "label": "Workflow on data 16_ant_simple_heyden_paedono_1548_minimal_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_ant_simple.ocrd.zip", + "label": "GT workspace 16th century Antiqua simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_simple_heyden_paedono_1548_minimal_ocr_ocr.zip", + "label": "OCR workspace for 16_ant_simple_heyden_paedono_1548_minimal_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_simple_heyden_paedono_1548_minimal_ocr_evaluation.zip", + "label": "Evaluation workspace for 16_ant_simple_heyden_paedono_1548_minimal_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 3, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 8.030865, + "cpu_time": 10.331791, + "cer_mean": 0.07452119312897007, + "cer_median": 0.0629800307219662, + "cer_range": [ + 0.037037037037037035, + 0.12354651162790697 + ], + "cer_standard_deviation": 0.044394494261965886, + "wer": 0.22683890077340793, + "pages_per_minute": 22.41352581571225 + }, + "by_page": [ + { + "page_id": "phys_0007", + "cer": 0.0629800307219662, + "wer": 0.21359223300970873 + }, + { + "page_id": "phys_0013", + "cer": 0.12354651162790697, + "wer": 0.33980582524271846 + }, + { + "page_id": "phys_0014", + "cer": 0.037037037037037035, + "wer": 0.1271186440677966 + } + ] + } + }, + { + "eval_workflow_id": "wf-data18_ant_simple_ballenstedt_delatio_1777_slower_processors_ocr-eval", + "label": "Workflow on data 18_ant_simple_ballenstedt_delatio_1777_slower_processors_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_ant_simple.ocrd.zip", + "label": "GT workspace 18th century Antiqua simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_ant_simple_ballenstedt_delatio_1777_slower_processors_ocr_ocr.zip", + "label": "OCR workspace for 18_ant_simple_ballenstedt_delatio_1777_slower_processors_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_ant_simple_ballenstedt_delatio_1777_slower_processors_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_ant_simple_ballenstedt_delatio_1777_slower_processors_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 3, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 73.616766, + "cpu_time": 113.02993400000001, + "cer_mean": 0.02808165896942696, + "cer_median": 0.02821869488536155, + "cer_range": [ + 0.020618556701030927, + 0.03540772532188841 + ], + "cer_standard_deviation": 0.007395536576593408, + "wer": 0.13838629881265765, + "pages_per_minute": 2.445095183887866 + }, + "by_page": [ + { + "page_id": "phys_00003", + "cer": 0.020618556701030927, + "wer": 0.11392405063291139 + }, + { + "page_id": "phys_00005", + "cer": 0.02821869488536155, + "wer": 0.16115702479338842 + }, + { + "page_id": "phys_00010", + "cer": 0.03540772532188841, + "wer": 0.14007782101167315 + } + ] + } + }, + { + "eval_workflow_id": "wf-data16_frak_simple_kistler_kraeuter_1500_slower_processors_ocr-eval", + "label": "Workflow on data 16_frak_simple_kistler_kraeuter_1500_slower_processors_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_frak_simple.ocrd.zip", + "label": "GT workspace 16th century Black letter simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_kistler_kraeuter_1500_slower_processors_ocr_ocr.zip", + "label": "OCR workspace for 16_frak_simple_kistler_kraeuter_1500_slower_processors_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_kistler_kraeuter_1500_slower_processors_ocr_evaluation.zip", + "label": "Evaluation workspace for 16_frak_simple_kistler_kraeuter_1500_slower_processors_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 2, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 34.966049000000005, + "cpu_time": 54.605062, + "cer_mean": 0.10034282802482036, + "cer_median": 0.10034282802482036, + "cer_range": [ + 0.09958847736625515, + 0.10109717868338558 + ], + "cer_standard_deviation": 0.001066812932128006, + "wer": 0.33029935275080907, + "pages_per_minute": 3.4319004700817066 + }, + "by_page": [ + { + "page_id": "phys_0007", + "cer": 0.09958847736625515, + "wer": 0.36893203883495146 + }, + { + "page_id": "phys_0021", + "cer": 0.10109717868338558, + "wer": 0.2916666666666667 + } + ] + } + }, + { + "eval_workflow_id": "wf-data18_frak_complex_bernd_lebensbeschreibung_1738_minimal_ocr-eval", + "label": "Workflow on data 18_frak_complex_bernd_lebensbeschreibung_1738_minimal_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", + "label": "GT workspace 18th century Black letter complex layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_bernd_lebensbeschreibung_1738_minimal_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_complex_bernd_lebensbeschreibung_1738_minimal_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_bernd_lebensbeschreibung_1738_minimal_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_complex_bernd_lebensbeschreibung_1738_minimal_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 3, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 8.242131, + "cpu_time": 10.755872, + "cer_mean": 0.1790109644215708, + "cer_median": 0.05504587155963303, + "cer_range": [ + 0.008409785932721712, + 0.4735772357723577 + ], + "cer_standard_deviation": 0.2561653709691831, + "wer": 0.2391410103864555, + "pages_per_minute": 21.83901226515327 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.4735772357723577, + "wer": 0.5882352941176471 + }, + { + "page_id": "phys_0002", + "cer": 0.05504587155963303, + "wer": 0.09821428571428571 + }, + { + "page_id": "phys_0003", + "cer": 0.008409785932721712, + "wer": 0.030973451327433628 + } + ] + } + }, + { + "eval_workflow_id": "wf-data19_ant_simple_blumenbach_anatomie_1805_slower_processors_ocr-eval", + "label": "Workflow on data 19_ant_simple_blumenbach_anatomie_1805_slower_processors_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/19_ant_simple.ocrd.zip", + "label": "GT workspace 19th century Antiqua simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_ant_simple_blumenbach_anatomie_1805_slower_processors_ocr_ocr.zip", + "label": "OCR workspace for 19_ant_simple_blumenbach_anatomie_1805_slower_processors_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_ant_simple_blumenbach_anatomie_1805_slower_processors_ocr_evaluation.zip", + "label": "Evaluation workspace for 19_ant_simple_blumenbach_anatomie_1805_slower_processors_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1800-1900", + "publication_decade": "", + "publication_year": "19th century", + "number_of_pages": 3, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 42.362522, + "cpu_time": 65.91597100000001, + "cer_mean": 0.08697690719872657, + "cer_median": 0.04421052631578947, + "cer_range": [ + 0.03735325506937033, + 0.17936694021101993 + ], + "cer_standard_deviation": 0.08008554296654442, + "wer": 0.2764482431149098, + "pages_per_minute": 4.249038808407111 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.04421052631578947, + "wer": 0.16666666666666666 + }, + { + "page_id": "phys_0002", + "cer": 0.03735325506937033, + "wer": 0.17037037037037037 + }, + { + "page_id": "phys_0003", + "cer": 0.17936694021101993, + "wer": 0.49230769230769234 + } + ] + } + }, + { + "eval_workflow_id": "wf-data18_frak_complex_justi_abhandlung01_1758_slower_processors_ocr-eval", + "label": "Workflow on data 18_frak_complex_justi_abhandlung01_1758_slower_processors_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", + "label": "GT workspace 18th century Black letter complex layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_justi_abhandlung01_1758_slower_processors_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_complex_justi_abhandlung01_1758_slower_processors_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_justi_abhandlung01_1758_slower_processors_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_complex_justi_abhandlung01_1758_slower_processors_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 55.553484, + "cpu_time": 89.664067, + "cer_mean": 0.06272817755704542, + "cer_median": 0.04686327354908531, + "cer_range": [ + 0.0039946737683089215, + 0.15319148936170213 + ], + "cer_standard_deviation": 0.07133216362479361, + "wer": 0.0881207064150021, + "pages_per_minute": 4.320161090166731 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.0073145245559038665, + "wer": 0.02857142857142857 + }, + { + "page_id": "phys_0002", + "cer": 0.15319148936170213, + "wer": 0.20833333333333334 + }, + { + "page_id": "phys_0003", + "cer": 0.08641202254226675, + "wer": 0.10236220472440945 + }, + { + "page_id": "phys_0004", + "cer": 0.0039946737683089215, + "wer": 0.013215859030837005 + } + ] + } + }, + { + "eval_workflow_id": "wf-data18_fontmix_complex_benner_herrnhuterey04_1748_selected_pages_ocr-eval", + "label": "Workflow on data 18_fontmix_complex_benner_herrnhuterey04_1748_selected_pages_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_fontmix_complex.ocrd.zip", + "label": "GT workspace 18th century Font Mix complex layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_fontmix_complex_benner_herrnhuterey04_1748_selected_pages_ocr_ocr.zip", + "label": "OCR workspace for 18_fontmix_complex_benner_herrnhuterey04_1748_selected_pages_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_fontmix_complex_benner_herrnhuterey04_1748_selected_pages_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_fontmix_complex_benner_herrnhuterey04_1748_selected_pages_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "level-of-operation": "page", + "dpi": 0, + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 151.101655, + "cpu_time": 259.05950800000005, + "cer_mean": 0.140061110527823, + "cer_median": 0.06466848046246318, + "cer_range": [ + 0.028685258964143426, + 0.4022222222222222 + ], + "cer_standard_deviation": 0.17581800739756628, + "wer": 0.23651027895776314, + "pages_per_minute": 1.5883346876644073 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.4022222222222222, + "wer": 0.5474452554744526 + }, + { + "page_id": "phys_0002", + "cer": 0.028685258964143426, + "wer": 0.09195402298850575 + }, + { + "page_id": "phys_0003", + "cer": 0.07550335570469799, + "wer": 0.19553072625698323 + }, + { + "page_id": "phys_0004", + "cer": 0.053833605220228384, + "wer": 0.1111111111111111 + } + ] + } + }, + { + "eval_workflow_id": "wf-data18_frak_simple_lessing_menschengeschlecht_1780_minimal_ocr-eval", + "label": "Workflow on data 18_frak_simple_lessing_menschengeschlecht_1780_minimal_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_simple.ocrd.zip", + "label": "GT workspace 18th century Black letter simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_simple_lessing_menschengeschlecht_1780_minimal_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_simple_lessing_menschengeschlecht_1780_minimal_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_simple_lessing_menschengeschlecht_1780_minimal_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_simple_lessing_menschengeschlecht_1780_minimal_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 1, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 1.751117, + "cpu_time": 2.32017, + "cer_mean": 0.02493765586034913, + "cer_median": 0.02493765586034913, + "cer_range": [ + 0.02493765586034913, + 0.02493765586034913 + ], + "cer_standard_deviation": null, + "wer": 0.09836065573770492, + "pages_per_minute": 34.263844163468235 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.02493765586034913, + "wer": 0.09836065573770492 + } + ] + } + }, + { + "eval_workflow_id": "wf-data16_frak_simple_trota_mordtbrenner_1540_slower_processors_ocr-eval", + "label": "Workflow on data 16_frak_simple_trota_mordtbrenner_1540_slower_processors_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_frak_simple.ocrd.zip", + "label": "GT workspace 16th century Black letter simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_trota_mordtbrenner_1540_slower_processors_ocr_ocr.zip", + "label": "OCR workspace for 16_frak_simple_trota_mordtbrenner_1540_slower_processors_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_trota_mordtbrenner_1540_slower_processors_ocr_evaluation.zip", + "label": "Evaluation workspace for 16_frak_simple_trota_mordtbrenner_1540_slower_processors_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 2, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 28.966472, + "cpu_time": 40.91749, + "cer_mean": 0.07499762380001901, + "cer_median": 0.07499762380001901, + "cer_range": [ + 0.0658682634730539, + 0.08412698412698413 + ], + "cer_standard_deviation": 0.012910865190184943, + "wer": 0.15174388339406558, + "pages_per_minute": 4.14272059089557 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.0658682634730539, + "wer": 0.18584070796460178 + }, + { + "page_id": "phys_0002", + "cer": 0.08412698412698413, + "wer": 0.11764705882352941 + } + ] + } + }, + { + "eval_workflow_id": "wf-data16_frak_simple_trota_mordtbrenner_1540_selected_pages_ocr-eval", + "label": "Workflow on data 16_frak_simple_trota_mordtbrenner_1540_selected_pages_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_frak_simple.ocrd.zip", + "label": "GT workspace 16th century Black letter simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_trota_mordtbrenner_1540_selected_pages_ocr_ocr.zip", + "label": "OCR workspace for 16_frak_simple_trota_mordtbrenner_1540_selected_pages_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_trota_mordtbrenner_1540_selected_pages_ocr_evaluation.zip", + "label": "Evaluation workspace for 16_frak_simple_trota_mordtbrenner_1540_selected_pages_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "level-of-operation": "page", + "dpi": 0, + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 2, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 108.32675099999999, + "cpu_time": 136.94418900000002, + "cer_mean": 0.07708867978329056, + "cer_median": 0.07708867978329056, + "cer_range": [ + 0.05238095238095238, + 0.10179640718562874 + ], + "cer_standard_deviation": 0.034942003187804015, + "wer": 0.2288738504251258, + "pages_per_minute": 1.1077596151665254 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.10179640718562874, + "wer": 0.3008849557522124 + }, + { + "page_id": "phys_0002", + "cer": 0.05238095238095238, + "wer": 0.1568627450980392 + } + ] + } + }, + { + "eval_workflow_id": "wf-data18_frak_simple_lessing_menschengeschlecht_1780_selected_pages_ocr-eval", + "label": "Workflow on data 18_frak_simple_lessing_menschengeschlecht_1780_selected_pages_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_simple.ocrd.zip", + "label": "GT workspace 18th century Black letter simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_simple_lessing_menschengeschlecht_1780_selected_pages_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_simple_lessing_menschengeschlecht_1780_selected_pages_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_simple_lessing_menschengeschlecht_1780_selected_pages_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_simple_lessing_menschengeschlecht_1780_selected_pages_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "level-of-operation": "page", + "dpi": 0, + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 1, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 23.314418, + "cpu_time": 34.297031000000004, + "cer_mean": 0.02493765586034913, + "cer_median": 0.02493765586034913, + "cer_range": [ + 0.02493765586034913, + 0.02493765586034913 + ], + "cer_standard_deviation": null, + "wer": 0.09836065573770492, + "pages_per_minute": 2.5735148095912153 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.02493765586034913, + "wer": 0.09836065573770492 + } + ] + } + }, + { + "eval_workflow_id": "wf-data16_frak_simple_luther_auszlegunge_1520_selected_pages_ocr-eval", + "label": "Workflow on data 16_frak_simple_luther_auszlegunge_1520_selected_pages_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_frak_simple.ocrd.zip", + "label": "GT workspace 16th century Black letter simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_luther_auszlegunge_1520_selected_pages_ocr_ocr.zip", + "label": "OCR workspace for 16_frak_simple_luther_auszlegunge_1520_selected_pages_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_luther_auszlegunge_1520_selected_pages_ocr_evaluation.zip", + "label": "Evaluation workspace for 16_frak_simple_luther_auszlegunge_1520_selected_pages_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "level-of-operation": "page", + "dpi": 0, + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 2, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 106.636709, + "cpu_time": 141.711402, + "cer_mean": 0.23276787879487743, + "cer_median": 0.23276787879487743, + "cer_range": [ + 0.07704654895666131, + 0.38848920863309355 + ], + "cer_standard_deviation": 0.22022321660797936, + "wer": 0.33520737327188943, + "pages_per_minute": 1.125316048528842 + }, + "by_page": [ + { + "page_id": "phys_0003", + "cer": 0.38848920863309355, + "wer": 0.44 + }, + { + "page_id": "phys_0029", + "cer": 0.07704654895666131, + "wer": 0.2304147465437788 + } + ] + } + }, + { + "eval_workflow_id": "wf-data17_frak_simple_calvi_beutelschneider01_1627_minimal_ocr-eval", + "label": "Workflow on data 17_frak_simple_calvi_beutelschneider01_1627_minimal_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/17_frak_simple.ocrd.zip", + "label": "GT workspace 17th century Black letter simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_simple_calvi_beutelschneider01_1627_minimal_ocr_ocr.zip", + "label": "OCR workspace for 17_frak_simple_calvi_beutelschneider01_1627_minimal_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_simple_calvi_beutelschneider01_1627_minimal_ocr_evaluation.zip", + "label": "Evaluation workspace for 17_frak_simple_calvi_beutelschneider01_1627_minimal_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1600-1700", + "publication_decade": "", + "publication_year": "17th century", + "number_of_pages": 3, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 7.886743, + "cpu_time": 10.460914, + "cer_mean": 0.08427970798416445, + "cer_median": 0.09331797235023041, + "cer_range": [ + 0.05025996533795494, + 0.10926118626430802 + ], + "cer_standard_deviation": 0.030521364398782277, + "wer": 0.1842438386542771, + "pages_per_minute": 22.82310961571843 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.10926118626430802, + "wer": 0.2229299363057325 + }, + { + "page_id": "phys_0002", + "cer": 0.09331797235023041, + "wer": 0.20689655172413793 + }, + { + "page_id": "phys_0003", + "cer": 0.05025996533795494, + "wer": 0.12290502793296089 + } + ] + } + }, + { + "eval_workflow_id": "wf-data19_ant_simple_blumenbach_anatomie_1805_minimal_ocr-eval", + "label": "Workflow on data 19_ant_simple_blumenbach_anatomie_1805_minimal_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/19_ant_simple.ocrd.zip", + "label": "GT workspace 19th century Antiqua simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_ant_simple_blumenbach_anatomie_1805_minimal_ocr_ocr.zip", + "label": "OCR workspace for 19_ant_simple_blumenbach_anatomie_1805_minimal_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_ant_simple_blumenbach_anatomie_1805_minimal_ocr_evaluation.zip", + "label": "Evaluation workspace for 19_ant_simple_blumenbach_anatomie_1805_minimal_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1800-1900", + "publication_decade": "", + "publication_year": "19th century", + "number_of_pages": 3, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 12.28958, + "cpu_time": 14.798878, + "cer_mean": 0.08328200324172261, + "cer_median": 0.08736842105263158, + "cer_range": [ + 0.04055496264674493, + 0.12192262602579132 + ], + "cer_standard_deviation": 0.04083746158658049, + "wer": 0.23519468186134854, + "pages_per_minute": 14.646554235376634 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.08736842105263158, + "wer": 0.22666666666666666 + }, + { + "page_id": "phys_0002", + "cer": 0.04055496264674493, + "wer": 0.14814814814814814 + }, + { + "page_id": "phys_0003", + "cer": 0.12192262602579132, + "wer": 0.33076923076923076 + } + ] + } + }, + { + "eval_workflow_id": "wf-data18_frak_complex_nn_besuch_1780_minimal_ocr-eval", + "label": "Workflow on data 18_frak_complex_nn_besuch_1780_minimal_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", + "label": "GT workspace 18th century Black letter complex layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_nn_besuch_1780_minimal_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_complex_nn_besuch_1780_minimal_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_nn_besuch_1780_minimal_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_complex_nn_besuch_1780_minimal_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 13.367254, + "cpu_time": 16.145112, + "cer_mean": 0.06315407734401027, + "cer_median": 0.026726016076928857, + "cer_range": [ + 0.01878453038674033, + 0.18037974683544303 + ], + "cer_standard_deviation": 0.07825196427362012, + "wer": 0.11662846322503488, + "pages_per_minute": 17.95432330379897 + }, + "by_page": [ + { + "page_id": "phys_00001", + "cer": 0.18037974683544303, + "wer": 0.1896551724137931 + }, + { + "page_id": "phys_00002", + "cer": 0.01878453038674033, + "wer": 0.08670520231213873 + }, + { + "page_id": "phys_00003", + "cer": 0.02505446623093682, + "wer": 0.08670520231213873 + }, + { + "page_id": "phys_00004", + "cer": 0.028397565922920892, + "wer": 0.10344827586206896 + } + ] + } + }, + { + "eval_workflow_id": "wf-data16_frak_simple_kistler_kraeuter_1500_minimal_ocr-eval", + "label": "Workflow on data 16_frak_simple_kistler_kraeuter_1500_minimal_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_frak_simple.ocrd.zip", + "label": "GT workspace 16th century Black letter simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_kistler_kraeuter_1500_minimal_ocr_ocr.zip", + "label": "OCR workspace for 16_frak_simple_kistler_kraeuter_1500_minimal_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_kistler_kraeuter_1500_minimal_ocr_evaluation.zip", + "label": "Evaluation workspace for 16_frak_simple_kistler_kraeuter_1500_minimal_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 2, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 8.741153, + "cpu_time": 10.631666, + "cer_mean": 0.11307713146793608, + "cer_median": 0.11307713146793608, + "cer_range": [ + 0.09135802469135802, + 0.13479623824451412 + ], + "cer_standard_deviation": 0.03071545536606607, + "wer": 0.35497572815533984, + "pages_per_minute": 13.728166066879277 + }, + "by_page": [ + { + "page_id": "phys_0007", + "cer": 0.09135802469135802, + "wer": 0.33495145631067963 + }, + { + "page_id": "phys_0021", + "cer": 0.13479623824451412, + "wer": 0.375 + } + ] + } + }, + { + "eval_workflow_id": "wf-data16_frak_simple_kistler_kraeuter_1500_selected_pages_ocr-eval", + "label": "Workflow on data 16_frak_simple_kistler_kraeuter_1500_selected_pages_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_frak_simple.ocrd.zip", + "label": "GT workspace 16th century Black letter simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_kistler_kraeuter_1500_selected_pages_ocr_ocr.zip", + "label": "OCR workspace for 16_frak_simple_kistler_kraeuter_1500_selected_pages_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_kistler_kraeuter_1500_selected_pages_ocr_evaluation.zip", + "label": "Evaluation workspace for 16_frak_simple_kistler_kraeuter_1500_selected_pages_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "level-of-operation": "page", + "dpi": 0, + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 2, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 81.76006000000001, + "cpu_time": 115.33805, + "cer_mean": 0.05713778913012629, + "cer_median": 0.05713778913012629, + "cer_range": [ + 0.05172413793103448, + 0.06255144032921811 + ], + "cer_standard_deviation": 0.0076560589477130125, + "wer": 0.24251618122977348, + "pages_per_minute": 1.4677092947338832 + }, + "by_page": [ + { + "page_id": "phys_0007", + "cer": 0.06255144032921811, + "wer": 0.2766990291262136 + }, + { + "page_id": "phys_0021", + "cer": 0.05172413793103448, + "wer": 0.20833333333333334 + } + ] + } + }, + { + "eval_workflow_id": "wf-data18_frak_complex_estor_rechtsgelehrsamkeit02_1758_slower_processors_ocr-eval", + "label": "Workflow on data 18_frak_complex_estor_rechtsgelehrsamkeit02_1758_slower_processors_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", + "label": "GT workspace 18th century Black letter complex layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_estor_rechtsgelehrsamkeit02_1758_slower_processors_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_complex_estor_rechtsgelehrsamkeit02_1758_slower_processors_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_estor_rechtsgelehrsamkeit02_1758_slower_processors_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_complex_estor_rechtsgelehrsamkeit02_1758_slower_processors_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 63.120688, + "cpu_time": 99.46367699999999, + "cer_mean": 0.2886254685112176, + "cer_median": 0.2965426192770805, + "cer_range": [ + 0.07856341189674523, + 0.4828532235939643 + ], + "cer_standard_deviation": 0.204671750373206, + "wer": 0.4238352166683633, + "pages_per_minute": 3.802239924888018 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.4828532235939643, + "wer": 0.5614035087719298 + }, + { + "page_id": "phys_0002", + "cer": 0.14873239436619717, + "wer": 0.2695035460992908 + }, + { + "page_id": "phys_0003", + "cer": 0.07856341189674523, + "wer": 0.22807017543859648 + }, + { + "page_id": "phys_0004", + "cer": 0.4443528441879637, + "wer": 0.6363636363636364 + } + ] + } + }, + { + "eval_workflow_id": "wf-data18_frak_complex_bernd_lebensbeschreibung_1738_selected_pages_ocr-eval", + "label": "Workflow on data 18_frak_complex_bernd_lebensbeschreibung_1738_selected_pages_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", + "label": "GT workspace 18th century Black letter complex layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_bernd_lebensbeschreibung_1738_selected_pages_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_complex_bernd_lebensbeschreibung_1738_selected_pages_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_bernd_lebensbeschreibung_1738_selected_pages_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_complex_bernd_lebensbeschreibung_1738_selected_pages_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "level-of-operation": "page", + "dpi": 0, + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 3, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 81.051665, + "cpu_time": 123.14032399999999, + "cer_mean": 0.08296660948260859, + "cer_median": 0.03058103975535168, + "cer_range": [ + 0.02522935779816514, + 0.19308943089430894 + ], + "cer_standard_deviation": 0.09540669248903694, + "wer": 0.16153231203986018, + "pages_per_minute": 2.220805704608289 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.19308943089430894, + "wer": 0.38235294117647056 + }, + { + "page_id": "phys_0002", + "cer": 0.03058103975535168, + "wer": 0.05357142857142857 + }, + { + "page_id": "phys_0003", + "cer": 0.02522935779816514, + "wer": 0.048672566371681415 + } + ] + } + }, + { + "eval_workflow_id": "wf-data16_ant_complex_alberti_pictura_1540_selected_pages_ocr-eval", + "label": "Workflow on data 16_ant_complex_alberti_pictura_1540_selected_pages_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_ant_complex.ocrd.zip", + "label": "GT workspace 16th century Antiqua complex layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_complex_alberti_pictura_1540_selected_pages_ocr_ocr.zip", + "label": "OCR workspace for 16_ant_complex_alberti_pictura_1540_selected_pages_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_complex_alberti_pictura_1540_selected_pages_ocr_evaluation.zip", + "label": "Evaluation workspace for 16_ant_complex_alberti_pictura_1540_selected_pages_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "level-of-operation": "page", + "dpi": 0, + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 3, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 69.338131, + "cpu_time": 122.13893500000002, + "cer_mean": 0.11095656062741122, + "cer_median": 0.12244897959183673, + "cer_range": [ + 0.07700101317122594, + 0.133419689119171 + ], + "cer_standard_deviation": 0.02991360090611332, + "wer": 0.26341315349276156, + "pages_per_minute": 2.595974212226747 + }, + "by_page": [ + { + "page_id": "phys_0007", + "cer": 0.133419689119171, + "wer": 0.36363636363636365 + }, + { + "page_id": "phys_0008", + "cer": 0.07700101317122594, + "wer": 0.18012422360248448 + }, + { + "page_id": "phys_0009", + "cer": 0.12244897959183673, + "wer": 0.24647887323943662 + } + ] + } + }, + { + "eval_workflow_id": "wf-data18_frak_complex_buerger_gedichte_1778_selected_pages_ocr-eval", + "label": "Workflow on data 18_frak_complex_buerger_gedichte_1778_selected_pages_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", + "label": "GT workspace 18th century Black letter complex layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_buerger_gedichte_1778_selected_pages_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_complex_buerger_gedichte_1778_selected_pages_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_buerger_gedichte_1778_selected_pages_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_complex_buerger_gedichte_1778_selected_pages_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "level-of-operation": "page", + "dpi": 0, + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 2, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 46.097406, + "cpu_time": 67.55636299999999, + "cer_mean": 0.15032645549695894, + "cer_median": 0.15032645549695894, + "cer_range": [ + 0.04932735426008968, + 0.2513255567338282 + ], + "cer_standard_deviation": 0.14283429875667375, + "wer": 0.3456967605903776, + "pages_per_minute": 2.60318335482912 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.2513255567338282, + "wer": 0.475177304964539 + }, + { + "page_id": "phys_0002", + "cer": 0.04932735426008968, + "wer": 0.21621621621621623 + } + ] + } + }, + { + "eval_workflow_id": "wf-data16_frak_simple_trota_mordtbrenner_1540_minimal_ocr-eval", + "label": "Workflow on data 16_frak_simple_trota_mordtbrenner_1540_minimal_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_frak_simple.ocrd.zip", + "label": "GT workspace 16th century Black letter simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_trota_mordtbrenner_1540_minimal_ocr_ocr.zip", + "label": "OCR workspace for 16_frak_simple_trota_mordtbrenner_1540_minimal_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_frak_simple_trota_mordtbrenner_1540_minimal_ocr_evaluation.zip", + "label": "Evaluation workspace for 16_frak_simple_trota_mordtbrenner_1540_minimal_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 2, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 7.257076, + "cpu_time": 8.706393, + "cer_mean": 0.043071000855431994, + "cer_median": 0.043071000855431994, + "cer_range": [ + 0.014285714285714285, + 0.0718562874251497 + ], + "cer_standard_deviation": 0.04070854266369089, + "wer": 0.10714905431199029, + "pages_per_minute": 16.53558540657422 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.0718562874251497, + "wer": 0.19469026548672566 + }, + { + "page_id": "phys_0002", + "cer": 0.014285714285714285, + "wer": 0.0196078431372549 + } + ] + } + }, + { + "eval_workflow_id": "wf-data18_ant_simple_ballenstedt_delatio_1777_selected_pages_ocr-eval", + "label": "Workflow on data 18_ant_simple_ballenstedt_delatio_1777_selected_pages_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_ant_simple.ocrd.zip", + "label": "GT workspace 18th century Antiqua simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_ant_simple_ballenstedt_delatio_1777_selected_pages_ocr_ocr.zip", + "label": "OCR workspace for 18_ant_simple_ballenstedt_delatio_1777_selected_pages_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_ant_simple_ballenstedt_delatio_1777_selected_pages_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_ant_simple_ballenstedt_delatio_1777_selected_pages_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "level-of-operation": "page", + "dpi": 0, + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 3, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 189.99469000000002, + "cpu_time": 255.53890399999997, + "cer_mean": 0.03947936370768366, + "cer_median": 0.0376249265138154, + "cer_range": [ + 0.022336769759450172, + 0.05847639484978541 + ], + "cer_standard_deviation": 0.01814103989293333, + "wer": 0.15140780173936938, + "pages_per_minute": 0.947394898246893 + }, + "by_page": [ + { + "page_id": "phys_00003", + "cer": 0.022336769759450172, + "wer": 0.10970464135021098 + }, + { + "page_id": "phys_00005", + "cer": 0.0376249265138154, + "wer": 0.16942148760330578 + }, + { + "page_id": "phys_00010", + "cer": 0.05847639484978541, + "wer": 0.17509727626459143 + } + ] + } + }, + { + "eval_workflow_id": "wf-data17_frak_simple_calvi_beutelschneider01_1627_selected_pages_ocr-eval", + "label": "Workflow on data 17_frak_simple_calvi_beutelschneider01_1627_selected_pages_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/17_frak_simple.ocrd.zip", + "label": "GT workspace 17th century Black letter simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_simple_calvi_beutelschneider01_1627_selected_pages_ocr_ocr.zip", + "label": "OCR workspace for 17_frak_simple_calvi_beutelschneider01_1627_selected_pages_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_simple_calvi_beutelschneider01_1627_selected_pages_ocr_evaluation.zip", + "label": "Evaluation workspace for 17_frak_simple_calvi_beutelschneider01_1627_selected_pages_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 133, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "level-of-operation": "page", + "dpi": 0, + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1600-1700", + "publication_decade": "", + "publication_year": "17th century", + "number_of_pages": 3, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 88.38021099999999, + "cpu_time": 127.13157100000001, + "cer_mean": 0.047703401203510305, + "cer_median": 0.04723502304147465, + "cer_range": [ + 0.04592720970537262, + 0.04994797086368366 + ], + "cer_standard_deviation": 0.002050893378518156, + "wer": 0.17277993391305632, + "pages_per_minute": 2.0366550154536296 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.04994797086368366, + "wer": 0.21019108280254778 + }, + { + "page_id": "phys_0002", + "cer": 0.04723502304147465, + "wer": 0.15172413793103448 + }, + { + "page_id": "phys_0003", + "cer": 0.04592720970537262, + "wer": 0.1564245810055866 + } + ] + } + }, + { + "eval_workflow_id": "wf-data18_frak_complex_buerger_gedichte_1778_minimal_ocr-eval", + "label": "Workflow on data 18_frak_complex_buerger_gedichte_1778_minimal_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", + "label": "GT workspace 18th century Black letter complex layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_buerger_gedichte_1778_minimal_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_complex_buerger_gedichte_1778_minimal_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_buerger_gedichte_1778_minimal_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_complex_buerger_gedichte_1778_minimal_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 2, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 6.016163, + "cpu_time": 7.291227, + "cer_mean": 0.10672693293515115, + "cer_median": 0.10672693293515115, + "cer_range": [ + 0.04484304932735426, + 0.16861081654294804 + ], + "cer_standard_deviation": 0.08751702749046443, + "wer": 0.23135901859306116, + "pages_per_minute": 19.94626807817541 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.16861081654294804, + "wer": 0.3546099290780142 + }, + { + "page_id": "phys_0002", + "cer": 0.04484304932735426, + "wer": 0.10810810810810811 + } + ] + } + }, + { + "eval_workflow_id": "wf-data18_ant_simple_ballenstedt_delatio_1777_minimal_ocr-eval", + "label": "Workflow on data 18_ant_simple_ballenstedt_delatio_1777_minimal_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_ant_simple.ocrd.zip", + "label": "GT workspace 18th century Antiqua simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_ant_simple_ballenstedt_delatio_1777_minimal_ocr_ocr.zip", + "label": "OCR workspace for 18_ant_simple_ballenstedt_delatio_1777_minimal_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_ant_simple_ballenstedt_delatio_1777_minimal_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_ant_simple_ballenstedt_delatio_1777_minimal_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 3, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 24.13474, + "cpu_time": 27.011507, + "cer_mean": 0.06438361522903834, + "cer_median": 0.03969957081545064, + "cer_range": [ + 0.021764032073310423, + 0.13168724279835392 + ], + "cer_standard_deviation": 0.058972490200809365, + "wer": 0.16906902212925057, + "pages_per_minute": 7.458128821773095 + }, + "by_page": [ + { + "page_id": "phys_00003", + "cer": 0.021764032073310423, + "wer": 0.12236286919831224 + }, + { + "page_id": "phys_00005", + "cer": 0.13168724279835392, + "wer": 0.2603305785123967 + }, + { + "page_id": "phys_00010", + "cer": 0.03969957081545064, + "wer": 0.1245136186770428 + } + ] + } + }, + { + "eval_workflow_id": "wf-data17_frak_complex_huebner_handbuch_1696_minimal_ocr-eval", + "label": "Workflow on data 17_frak_complex_huebner_handbuch_1696_minimal_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/17_frak_complex.ocrd.zip", + "label": "GT workspace 17th century Black letter complex layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_huebner_handbuch_1696_minimal_ocr_ocr.zip", + "label": "OCR workspace for 17_frak_complex_huebner_handbuch_1696_minimal_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_huebner_handbuch_1696_minimal_ocr_evaluation.zip", + "label": "Evaluation workspace for 17_frak_complex_huebner_handbuch_1696_minimal_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1600-1700", + "publication_decade": "", + "publication_year": "17th century", + "number_of_pages": 3, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 16.954402, + "cpu_time": 19.205537, + "cer_mean": 0.20627969569747484, + "cer_median": 0.08702290076335878, + "cer_range": [ + 0.08487084870848709, + 0.44694533762057875 + ], + "cer_standard_deviation": 0.20842533731220508, + "wer": 0.3271033769383102, + "pages_per_minute": 10.616711813250621 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.44694533762057875, + "wer": 0.6341463414634146 + }, + { + "page_id": "phys_0002", + "cer": 0.08702290076335878, + "wer": 0.1504424778761062 + }, + { + "page_id": "phys_0003", + "cer": 0.08487084870848709, + "wer": 0.19672131147540983 + } + ] + } + }, + { + "eval_workflow_id": "wf-data19_frak_simple_arnimb_goethe03_1835_slower_processors_ocr-eval", + "label": "Workflow on data 19_frak_simple_arnimb_goethe03_1835_slower_processors_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/19_frak_simple.ocrd.zip", + "label": "GT workspace 19th century Black letter simple layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_frak_simple_arnimb_goethe03_1835_slower_processors_ocr_ocr.zip", + "label": "OCR workspace for 19_frak_simple_arnimb_goethe03_1835_slower_processors_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_frak_simple_arnimb_goethe03_1835_slower_processors_ocr_evaluation.zip", + "label": "Evaluation workspace for 19_frak_simple_arnimb_goethe03_1835_slower_processors_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Black Letter" + ], + "publication_century": "1800-1900", + "publication_decade": "", + "publication_year": "19th century", + "number_of_pages": 1, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 12.164379999999998, + "cpu_time": 19.670232, + "cer_mean": 0.0056657223796034, + "cer_median": 0.0056657223796034, + "cer_range": [ + 0.0056657223796034, + 0.0056657223796034 + ], + "cer_standard_deviation": null, + "wer": 0.031746031746031744, + "pages_per_minute": 4.932433876613524 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.0056657223796034, + "wer": 0.031746031746031744 + } + ] + } + }, + { + "eval_workflow_id": "wf-data17_frak_complex_silesius_seelenlust01_1657_slower_processors_ocr-eval", + "label": "Workflow on data 17_frak_complex_silesius_seelenlust01_1657_slower_processors_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/17_frak_complex.ocrd.zip", + "label": "GT workspace 17th century Black letter complex layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_silesius_seelenlust01_1657_slower_processors_ocr_ocr.zip", + "label": "OCR workspace for 17_frak_complex_silesius_seelenlust01_1657_slower_processors_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_silesius_seelenlust01_1657_slower_processors_ocr_evaluation.zip", + "label": "Evaluation workspace for 17_frak_complex_silesius_seelenlust01_1657_slower_processors_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, "xpath_parameters": {}, "xpath_model": {}, "auto_model": false, @@ -1734,67 +6949,63 @@ "document_metadata": { "data_properties": { "fonts": [ + "Antiqua", "Black Letter" ], - "publication_century": "1500-1600", + "publication_century": "1600-1700", "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 6, - "layout": "simple" + "publication_year": "17th century", + "number_of_pages": 5, + "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 24.570796, - "cpu_time": 30.071177, - "cer_mean": 0.13490219630962338, - "cer_median": 0.08500485504230823, + "wall_time": 51.051955, + "cpu_time": 79.096845, + "cer_mean": 0.16456973001701142, + "cer_median": 0.11229135053110774, "cer_range": [ - 0.014285714285714285, - 0.4184652278177458 + 0.05917159763313609, + 0.34146341463414637 ], - "cer_standard_deviation": 0.14421604715728026, - "wer": 0.2783759515546502, - "pages_per_minute": 14.651539982668854 + "cer_standard_deviation": 0.11554307430913685, + "wer": 0.34312458192490203, + "pages_per_minute": 5.876366536795701 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.0718562874251497, - "wer": 0.19469026548672566 + "cer": 0.05917159763313609, + "wer": 0.2463768115942029 }, { "page_id": "phys_0002", - "cer": 0.014285714285714285, - "wer": 0.0196078431372549 + "cer": 0.0913884007029877, + "wer": 0.28125 }, { "page_id": "phys_0003", - "cer": 0.4184652278177458, - "wer": 0.48333333333333334 - }, - { - "page_id": "phys_0007", - "cer": 0.09135802469135802, - "wer": 0.33495145631067963 + "cer": 0.34146341463414637, + "wer": 0.5774647887323944 }, { - "page_id": "phys_0021", - "cer": 0.13479623824451412, - "wer": 0.375 + "page_id": "phys_0004", + "cer": 0.11229135053110774, + "wer": 0.1693548387096774 }, { - "page_id": "phys_0029", - "cer": 0.07865168539325842, - "wer": 0.2626728110599078 + "page_id": "phys_0005", + "cer": 0.21853388658367912, + "wer": 0.4411764705882353 } ] } }, { - "eval_workflow_id": "wf-data18_fontmix_complex_minimal_ocr-eval", - "label": "Workflow on data 18_fontmix_complex_minimal_ocr", + "eval_workflow_id": "wf-data17_frak_complex_silesius_seelenlust01_1657_minimal_ocr-eval", + "label": "Workflow on data 17_frak_complex_silesius_seelenlust01_1657_minimal_ocr", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", @@ -1805,16 +7016,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_fontmix_complex.ocrd.zip", - "label": "GT workspace 18th century Font Mix complex layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/17_frak_complex.ocrd.zip", + "label": "GT workspace 17th century Black letter complex layout" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_fontmix_complex_minimal_ocr_ocr.zip", - "label": "OCR workspace for 18_fontmix_complex_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_silesius_seelenlust01_1657_minimal_ocr_ocr.zip", + "label": "OCR workspace for 17_frak_complex_silesius_seelenlust01_1657_minimal_ocr" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_fontmix_complex_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_fontmix_complex_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_silesius_seelenlust01_1657_minimal_ocr_evaluation.zip", + "label": "Evaluation workspace for 17_frak_complex_silesius_seelenlust01_1657_minimal_ocr" }, "workflow_steps": [ { @@ -1852,90 +7063,175 @@ "Antiqua", "Black Letter" ], - "publication_century": "1700-1800", + "publication_century": "1600-1700", "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 4, + "publication_year": "17th century", + "number_of_pages": 5, "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 18.971287, - "cpu_time": 22.54346, - "cer_mean": 0.20433470587543373, - "cer_median": 0.1700173533179325, + "wall_time": 13.146098, + "cpu_time": 16.982357, + "cer_mean": 0.3024629978548877, + "cer_median": 0.2951219512195122, "cer_range": [ - 0.04063745019920319, - 0.43666666666666665 + 0.19271623672230653, + 0.44970414201183434 ], - "cer_standard_deviation": 0.19097881579976753, - "wer": 0.31449770246107817, - "pages_per_minute": 12.6506968135583 + "cer_standard_deviation": 0.10474043149462715, + "wer": 0.5195139299620842, + "pages_per_minute": 22.820459728810782 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.43666666666666665, - "wer": 0.583941605839416 + "cer": 0.44970414201183434, + "wer": 0.7101449275362319 }, { "page_id": "phys_0002", - "cer": 0.04063745019920319, - "wer": 0.10344827586206896 + "cer": 0.2179261862917399, + "wer": 0.5208333333333334 }, { "page_id": "phys_0003", - "cer": 0.05536912751677853, - "wer": 0.18435754189944134 + "cer": 0.2951219512195122, + "wer": 0.4647887323943662 }, { "page_id": "phys_0004", - "cer": 0.28466557911908646, - "wer": 0.3862433862433862 + "cer": 0.19271623672230653, + "wer": 0.3870967741935484 + }, + { + "page_id": "phys_0005", + "cer": 0.35684647302904565, + "wer": 0.5147058823529411 } ] } }, { - "eval_workflow_id": "wf-data16_ant_simple_minimal_ocr-eval", - "label": "Workflow on data 16_ant_simple_minimal_ocr", + "eval_workflow_id": "wf-data18_frak_complex_luz_blitz_1784_slower_processors_ocr-eval", + "label": "Workflow on data 18_frak_complex_luz_blitz_1784_slower_processors_ocr", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_ant_simple.ocrd.zip", - "label": "GT workspace 16th century Antiqua simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", + "label": "GT workspace 18th century Black letter complex layout" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_simple_minimal_ocr_ocr.zip", - "label": "OCR workspace for 16_ant_simple_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_luz_blitz_1784_slower_processors_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_complex_luz_blitz_1784_slower_processors_ocr" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_simple_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_ant_simple_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_luz_blitz_1784_slower_processors_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_complex_luz_blitz_1784_slower_processors_ocr" }, "workflow_steps": [ { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-binarize", "params": { - "segmentation_level": "region", - "textequiv_level": "word", + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, "model": "Fraktur_GT4HistOCR", "dpi": 0, "padding": 0, - "overwrite_segments": false, + "segmentation_level": "word", "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, + "find_tables": true, "find_staves": false, "sparse_text": false, "raw_lines": false, @@ -1955,52 +7251,58 @@ "document_metadata": { "data_properties": { "fonts": [ - "Antiqua" + "Antiqua", + "Black Letter" ], - "publication_century": "1500-1600", + "publication_century": "1700-1800", "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 3, - "layout": "simple" + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 7.865648, - "cpu_time": 10.22416, - "cer_mean": 0.07452119312897007, - "cer_median": 0.0629800307219662, + "wall_time": 64.58104600000001, + "cpu_time": 99.479389, + "cer_mean": 0.02547899799369996, + "cer_median": 0.027204160076875337, "cer_range": [ - 0.037037037037037035, - 0.12354651162790697 + 0.010332950631458095, + 0.03717472118959108 ], - "cer_standard_deviation": 0.044394494261965886, - "wer": 0.22683890077340793, - "pages_per_minute": 22.88431925761234 + "cer_standard_deviation": 0.011266997500136374, + "wer": 0.07714749104131867, + "pages_per_minute": 3.716260650222357 }, "by_page": [ { - "page_id": "phys_0007", - "cer": 0.0629800307219662, - "wer": 0.21359223300970873 + "page_id": "phys_0001", + "cer": 0.03717472118959108, + "wer": 0.10256410256410256 + }, + { + "page_id": "phys_0002", + "cer": 0.025119617224880382, + "wer": 0.09565217391304348 }, { - "page_id": "phys_0013", - "cer": 0.12354651162790697, - "wer": 0.33980582524271846 + "page_id": "phys_0003", + "cer": 0.029288702928870293, + "wer": 0.07924528301886792 }, { - "page_id": "phys_0014", - "cer": 0.037037037037037035, - "wer": 0.1271186440677966 + "page_id": "phys_0004", + "cer": 0.010332950631458095, + "wer": 0.0311284046692607 } ] } }, { - "eval_workflow_id": "wf-data19_ant_simple_selected_pages_ocr-eval", - "label": "Workflow on data 19_ant_simple_selected_pages_ocr", + "eval_workflow_id": "wf-data19_ant_simple_blumenbach_anatomie_1805_selected_pages_ocr-eval", + "label": "Workflow on data 19_ant_simple_blumenbach_anatomie_1805_selected_pages_ocr", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", @@ -2015,12 +7317,12 @@ "label": "GT workspace 19th century Antiqua simple layout" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_ant_simple_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 19_ant_simple_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_ant_simple_blumenbach_anatomie_1805_selected_pages_ocr_ocr.zip", + "label": "OCR workspace for 19_ant_simple_blumenbach_anatomie_1805_selected_pages_ocr" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_ant_simple_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 19_ant_simple_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_ant_simple_blumenbach_anatomie_1805_selected_pages_ocr_evaluation.zip", + "label": "Evaluation workspace for 19_ant_simple_blumenbach_anatomie_1805_selected_pages_ocr" }, "workflow_steps": [ { @@ -2123,7 +7425,8 @@ "document_metadata": { "data_properties": { "fonts": [ - "Antiqua" + "Antiqua", + "Black Letter" ], "publication_century": "1800-1900", "publication_decade": "", @@ -2135,8 +7438,8 @@ }, "evaluation_results": { "document_wide": { - "wall_time": 113.83170799999999, - "cpu_time": 159.490409, + "wall_time": 97.867795, + "cpu_time": 142.045967, "cer_mean": 0.08457767160660273, "cer_median": 0.07737397420867527, "cer_range": [ @@ -2145,7 +7448,7 @@ ], "cer_standard_deviation": 0.046973440956685215, "wer": 0.3081861348528015, - "pages_per_minute": 1.5812817286375076 + "pages_per_minute": 1.8392158523649174 }, "by_page": [ { @@ -2167,8 +7470,8 @@ } }, { - "eval_workflow_id": "wf-data19_ant_simple_slower_processors_ocr-eval", - "label": "Workflow on data 19_ant_simple_slower_processors_ocr", + "eval_workflow_id": "wf-data18_frak_complex_nn_besuch_1780_slower_processors_ocr-eval", + "label": "Workflow on data 18_frak_complex_nn_besuch_1780_slower_processors_ocr", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", @@ -2179,16 +7482,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/19_ant_simple.ocrd.zip", - "label": "GT workspace 19th century Antiqua simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", + "label": "GT workspace 18th century Black letter complex layout" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_ant_simple_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 19_ant_simple_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_nn_besuch_1780_slower_processors_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_complex_nn_besuch_1780_slower_processors_ocr" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_ant_simple_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 19_ant_simple_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_nn_besuch_1780_slower_processors_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_complex_nn_besuch_1780_slower_processors_ocr" }, "workflow_steps": [ { @@ -2303,52 +7606,345 @@ "document_metadata": { "data_properties": { "fonts": [ - "Antiqua" + "Antiqua", + "Black Letter" ], - "publication_century": "1800-1900", + "publication_century": "1700-1800", "publication_decade": "", - "publication_year": "19th century", + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 49.367179, + "cpu_time": 71.436347, + "cer_mean": 0.07900170040044713, + "cer_median": 0.018462174141687326, + "cer_range": [ + 0.013259668508287293, + 0.26582278481012656 + ], + "cer_standard_deviation": 0.1245848990441196, + "wer": 0.13667696498571524, + "pages_per_minute": 4.8615295599531825 + }, + "by_page": [ + { + "page_id": "phys_00001", + "cer": 0.26582278481012656, + "wer": 0.3620689655172414 + }, + { + "page_id": "phys_00002", + "cer": 0.013259668508287293, + "wer": 0.057803468208092484 + }, + { + "page_id": "phys_00003", + "cer": 0.020697167755991286, + "wer": 0.06936416184971098 + }, + { + "page_id": "phys_00004", + "cer": 0.016227180527383367, + "wer": 0.05747126436781609 + } + ] + } + }, + { + "eval_workflow_id": "wf-data17_frak_complex_huebner_handbuch_1696_slower_processors_ocr-eval", + "label": "Workflow on data 17_frak_complex_huebner_handbuch_1696_slower_processors_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/17_frak_complex.ocrd.zip", + "label": "GT workspace 17th century Black letter complex layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_huebner_handbuch_1696_slower_processors_ocr_ocr.zip", + "label": "OCR workspace for 17_frak_complex_huebner_handbuch_1696_slower_processors_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_huebner_handbuch_1696_slower_processors_ocr_evaluation.zip", + "label": "Evaluation workspace for 17_frak_complex_huebner_handbuch_1696_slower_processors_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1600-1700", + "publication_decade": "", + "publication_year": "17th century", "number_of_pages": 3, - "layout": "simple" + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 61.992684, + "cpu_time": 83.585045, + "cer_mean": 0.20641950275814583, + "cer_median": 0.1099236641221374, + "cer_range": [ + 0.09132841328413284, + 0.4180064308681672 + ], + "cer_standard_deviation": 0.18347538513029096, + "wer": 0.3802990111723255, + "pages_per_minute": 2.903568427526061 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.4180064308681672, + "wer": 0.7073170731707317 + }, + { + "page_id": "phys_0002", + "cer": 0.1099236641221374, + "wer": 0.24778761061946902 + }, + { + "page_id": "phys_0003", + "cer": 0.09132841328413284, + "wer": 0.18579234972677597 + } + ] + } + }, + { + "eval_workflow_id": "wf-data18_frak_complex_justi_abhandlung01_1758_minimal_ocr-eval", + "label": "Workflow on data 18_frak_complex_justi_abhandlung01_1758_minimal_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", + "label": "GT workspace 18th century Black letter complex layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_justi_abhandlung01_1758_minimal_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_complex_justi_abhandlung01_1758_minimal_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_justi_abhandlung01_1758_minimal_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_complex_justi_abhandlung01_1758_minimal_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Black Letter" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 47.942433, - "cpu_time": 73.79890900000001, - "cer_mean": 0.08697690719872657, - "cer_median": 0.04421052631578947, + "wall_time": 14.25996, + "cpu_time": 17.707908, + "cer_mean": 0.1347518651788604, + "cer_median": 0.14344023364180397, "cer_range": [ - 0.03735325506937033, - 0.17936694021101993 + 0.09893550407013149, + 0.15319148936170213 ], - "cer_standard_deviation": 0.08008554296654442, - "wer": 0.2764482431149098, - "pages_per_minute": 3.7545028221659087 + "cer_standard_deviation": 0.02557054025937362, + "wer": 0.19715777371165608, + "pages_per_minute": 16.830341740089032 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.04421052631578947, - "wer": 0.16666666666666666 + "cer": 0.13375130616509928, + "wer": 0.2 }, { "page_id": "phys_0002", - "cer": 0.03735325506937033, - "wer": 0.17037037037037037 + "cer": 0.15319148936170213, + "wer": 0.22916666666666666 }, { "page_id": "phys_0003", - "cer": 0.17936694021101993, - "wer": 0.49230769230769234 + "cer": 0.09893550407013149, + "wer": 0.12598425196850394 + }, + { + "page_id": "phys_0004", + "cer": 0.15312916111850866, + "wer": 0.23348017621145375 } ] } }, { - "eval_workflow_id": "wf-data19_frak_simple_selected_pages_ocr-eval", - "label": "Workflow on data 19_frak_simple_selected_pages_ocr", + "eval_workflow_id": "wf-data18_frak_complex_euler_rechenkunst01_1738_selected_pages_ocr-eval", + "label": "Workflow on data 18_frak_complex_euler_rechenkunst01_1738_selected_pages_ocr", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", @@ -2359,16 +7955,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/19_frak_simple.ocrd.zip", - "label": "GT workspace 19th century Black letter simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", + "label": "GT workspace 18th century Black letter complex layout" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_frak_simple_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 19_frak_simple_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_euler_rechenkunst01_1738_selected_pages_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_complex_euler_rechenkunst01_1738_selected_pages_ocr" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/19_frak_simple_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 19_frak_simple_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_euler_rechenkunst01_1738_selected_pages_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_complex_euler_rechenkunst01_1738_selected_pages_ocr" }, "workflow_steps": [ { @@ -2407,7 +8003,7 @@ "method": "li", "level-of-operation": "page", "dpi": 0, - "window_size": 301, + "window_size": 201, "k": 0.34 } }, @@ -2471,42 +8067,68 @@ "document_metadata": { "data_properties": { "fonts": [ + "Antiqua", "Black Letter" ], - "publication_century": "1800-1900", + "publication_century": "1700-1800", "publication_decade": "", - "publication_year": "19th century", - "number_of_pages": 1, - "layout": "simple" + "publication_year": "18th century", + "number_of_pages": 6, + "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 50.242278999999996, - "cpu_time": 72.25566799999999, - "cer_mean": 0.007554296506137866, - "cer_median": 0.007554296506137866, + "wall_time": 161.89715999999999, + "cpu_time": 264.56102, + "cer_mean": 0.1953749490770702, + "cer_median": 0.1576184687985986, "cer_range": [ - 0.007554296506137866, - 0.007554296506137866 + 0.09627059843885516, + 0.3873684210526316 ], - "cer_standard_deviation": null, - "wer": 0.015873015873015872, - "pages_per_minute": 1.1942133437060052 + "cer_standard_deviation": 0.11209255429867662, + "wer": 0.3314757763924516, + "pages_per_minute": 2.2236338179125563 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.007554296506137866, - "wer": 0.015873015873015872 + "cer": 0.09627059843885516, + "wer": 0.13953488372093023 + }, + { + "page_id": "phys_0002", + "cer": 0.3873684210526316, + "wer": 0.6036585365853658 + }, + { + "page_id": "phys_0003", + "cer": 0.12221368178324366, + "wer": 0.25728155339805825 + }, + { + "page_id": "phys_0004", + "cer": 0.2595959595959596, + "wer": 0.36129032258064514 + }, + { + "page_id": "phys_0005", + "cer": 0.11377777777777778, + "wer": 0.28402366863905326 + }, + { + "page_id": "phys_0006", + "cer": 0.1930232558139535, + "wer": 0.34306569343065696 } ] } }, { - "eval_workflow_id": "wf-data17_frak_simple_selected_pages_ocr-eval", - "label": "Workflow on data 17_frak_simple_selected_pages_ocr", + "eval_workflow_id": "wf-data16_ant_simple_heyden_paedono_1548_selected_pages_ocr-eval", + "label": "Workflow on data 16_ant_simple_heyden_paedono_1548_selected_pages_ocr", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", @@ -2517,16 +8139,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/17_frak_simple.ocrd.zip", - "label": "GT workspace 17th century Black letter simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_ant_simple.ocrd.zip", + "label": "GT workspace 16th century Antiqua simple layout" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_simple_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 17_frak_simple_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_simple_heyden_paedono_1548_selected_pages_ocr_ocr.zip", + "label": "OCR workspace for 16_ant_simple_heyden_paedono_1548_selected_pages_ocr" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_simple_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 17_frak_simple_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_simple_heyden_paedono_1548_selected_pages_ocr_evaluation.zip", + "label": "Evaluation workspace for 16_ant_simple_heyden_paedono_1548_selected_pages_ocr" }, "workflow_steps": [ { @@ -2565,7 +8187,7 @@ "method": "li", "level-of-operation": "page", "dpi": 0, - "window_size": 133, + "window_size": 301, "k": 0.34 } }, @@ -2629,172 +8251,73 @@ "document_metadata": { "data_properties": { "fonts": [ + "Antiqua", "Black Letter" ], - "publication_century": "1600-1700", - "publication_decade": "", - "publication_year": "17th century", - "number_of_pages": 3, - "layout": "simple" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 74.155102, - "cpu_time": 112.343518, - "cer_mean": 0.047703401203510305, - "cer_median": 0.04723502304147465, - "cer_range": [ - 0.04592720970537262, - 0.04994797086368366 - ], - "cer_standard_deviation": 0.002050893378518156, - "wer": 0.17277993391305632, - "pages_per_minute": 2.427344783370401 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.04994797086368366, - "wer": 0.21019108280254778 - }, - { - "page_id": "phys_0002", - "cer": 0.04723502304147465, - "wer": 0.15172413793103448 - }, - { - "page_id": "phys_0003", - "cer": 0.04592720970537262, - "wer": 0.1564245810055866 - } - ] - } - }, - { - "eval_workflow_id": "wf-data16_ant_complex_minimal_ocr-eval", - "label": "Workflow on data 16_ant_complex_minimal_ocr", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_ant_complex.ocrd.zip", - "label": "GT workspace 16th century Antiqua complex layout" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_complex_minimal_ocr_ocr.zip", - "label": "OCR workspace for 16_ant_complex_minimal_ocr" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_complex_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_ant_complex_minimal_ocr" - }, - "workflow_steps": [ - { - "id": "ocrd-tesserocr-recognize", - "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", - "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua" - ], "publication_century": "1500-1600", "publication_decade": "", "publication_year": "16th century", "number_of_pages": 3, - "layout": "complex" + "layout": "simple" } } }, "evaluation_results": { "document_wide": { - "wall_time": 7.579529, - "cpu_time": 10.163754, - "cer_mean": 0.10240852523716282, - "cer_median": 0.10536980749746708, + "wall_time": 72.48043099999998, + "cpu_time": 107.649963, + "cer_mean": 0.05488709037929677, + "cer_median": 0.03488372093023256, "cer_range": [ - 0.07124352331606218, - 0.1306122448979592 + 0.028395061728395062, + 0.10138248847926268 ], - "cer_standard_deviation": 0.02979493530847308, - "wer": 0.23466068901129858, - "pages_per_minute": 23.74817749229537 + "cer_standard_deviation": 0.04039668560556835, + "wer": 0.13745817563490756, + "pages_per_minute": 2.483428940978566 }, "by_page": [ { "page_id": "phys_0007", - "cer": 0.07124352331606218, - "wer": 0.2231404958677686 + "cer": 0.10138248847926268, + "wer": 0.17475728155339806 }, { - "page_id": "phys_0008", - "cer": 0.10536980749746708, - "wer": 0.2484472049689441 + "page_id": "phys_0013", + "cer": 0.03488372093023256, + "wer": 0.13592233009708737 }, { - "page_id": "phys_0009", - "cer": 0.1306122448979592, - "wer": 0.2323943661971831 + "page_id": "phys_0014", + "cer": 0.028395061728395062, + "wer": 0.1016949152542373 } ] } }, { - "eval_workflow_id": "wf-data17_frak_simple_slower_processors_ocr-eval", - "label": "Workflow on data 17_frak_simple_slower_processors_ocr", + "eval_workflow_id": "wf-data17_frak_complex_huebner_handbuch_1696_selected_pages_ocr-eval", + "label": "Workflow on data 17_frak_complex_huebner_handbuch_1696_selected_pages_ocr", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/17_frak_simple.ocrd.zip", - "label": "GT workspace 17th century Black letter simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/17_frak_complex.ocrd.zip", + "label": "GT workspace 17th century Black letter complex layout" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_simple_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 17_frak_simple_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_huebner_handbuch_1696_selected_pages_ocr_ocr.zip", + "label": "OCR workspace for 17_frak_complex_huebner_handbuch_1696_selected_pages_ocr" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_simple_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 17_frak_simple_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_huebner_handbuch_1696_selected_pages_ocr_evaluation.zip", + "label": "Evaluation workspace for 17_frak_complex_huebner_handbuch_1696_selected_pages_ocr" }, "workflow_steps": [ { @@ -2828,133 +8351,122 @@ } }, { - "id": "ocrd-cis-ocropy-denoise", + "id": "ocrd-skimage-binarize", "params": { + "method": "li", "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", "dpi": 0, - "min_orientation_confidence": 1.5 + "window_size": 301, + "k": 0.34 } }, { - "id": "ocrd-tesserocr-segment", + "id": "ocrd-skimage-denoise", "params": { + "level-of-operation": "page", "dpi": 0, - "padding": 4, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" + "protect": 0.0, + "maxsize": 1.0 } }, { - "id": "ocrd-cis-ocropy-dewarp", + "id": "ocrd-tesserocr-deskew", "params": { + "operation_level": "page", "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 + "min_orientation_confidence": 1.5 } }, { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-segment", "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, - "model": "Fraktur_GT4HistOCR", + "level-of-operation": "page", "dpi": 0, - "padding": 0, - "segmentation_level": "word", - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 } } ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.6.2)", "document_metadata": { "data_properties": { "fonts": [ + "Antiqua", "Black Letter" ], "publication_century": "1600-1700", "publication_decade": "", "publication_year": "17th century", "number_of_pages": 3, - "layout": "simple" + "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 45.753605, - "cpu_time": 66.773523, - "cer_mean": 0.15501219595245352, - "cer_median": 0.11654526534859522, + "wall_time": 182.12772, + "cpu_time": 235.173884, + "cer_mean": 0.2294897403822962, + "cer_median": 0.06717557251908397, "cer_range": [ - 0.09618717504332755, - 0.2523041474654378 + 0.061808118081180814, + 0.5594855305466238 ], - "cer_standard_deviation": 0.08486993479509113, - "wer": 0.360948415946103, - "pages_per_minute": 3.9341162297484535 + "cer_standard_deviation": 0.2857973382248686, + "wer": 0.36395117469236493, + "pages_per_minute": 0.9883174291096379 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.11654526534859522, - "wer": 0.46496815286624205 + "cer": 0.5594855305466238, + "wer": 0.8292682926829268 }, { "page_id": "phys_0002", - "cer": 0.2523041474654378, - "wer": 0.4 + "cer": 0.06717557251908397, + "wer": 0.11504424778761062 }, { "page_id": "phys_0003", - "cer": 0.09618717504332755, - "wer": 0.21787709497206703 + "cer": 0.061808118081180814, + "wer": 0.14754098360655737 } ] } }, { - "eval_workflow_id": "wf-data16_ant_complex_slower_processors_ocr-eval", - "label": "Workflow on data 16_ant_complex_slower_processors_ocr", + "eval_workflow_id": "wf-data17_frak_simple_calvi_beutelschneider01_1627_slower_processors_ocr-eval", + "label": "Workflow on data 17_frak_simple_calvi_beutelschneider01_1627_slower_processors_ocr", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", @@ -2965,16 +8477,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_ant_complex.ocrd.zip", - "label": "GT workspace 16th century Antiqua complex layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/17_frak_simple.ocrd.zip", + "label": "GT workspace 17th century Black letter simple layout" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_complex_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 16_ant_complex_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_simple_calvi_beutelschneider01_1627_slower_processors_ocr_ocr.zip", + "label": "OCR workspace for 17_frak_simple_calvi_beutelschneider01_1627_slower_processors_ocr" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_complex_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_ant_complex_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_simple_calvi_beutelschneider01_1627_slower_processors_ocr_evaluation.zip", + "label": "Evaluation workspace for 17_frak_simple_calvi_beutelschneider01_1627_slower_processors_ocr" }, "workflow_steps": [ { @@ -3089,52 +8601,53 @@ "document_metadata": { "data_properties": { "fonts": [ - "Antiqua" + "Antiqua", + "Black Letter" ], - "publication_century": "1500-1600", + "publication_century": "1600-1700", "publication_decade": "", - "publication_year": "16th century", + "publication_year": "17th century", "number_of_pages": 3, - "layout": "complex" + "layout": "simple" } } }, "evaluation_results": { "document_wide": { - "wall_time": 32.228635, - "cpu_time": 53.661828, - "cer_mean": 0.10363204260868718, - "cer_median": 0.11836734693877551, + "wall_time": 57.283714, + "cpu_time": 83.17899299999999, + "cer_mean": 0.15501219595245352, + "cer_median": 0.11654526534859522, "cer_range": [ - 0.046632124352331605, - 0.1458966565349544 + 0.09618717504332755, + 0.2523041474654378 ], - "cer_standard_deviation": 0.05124654849483992, - "wer": 0.2145458690579216, - "pages_per_minute": 5.585095366279088 + "cer_standard_deviation": 0.08486993479509113, + "wer": 0.360948415946103, + "pages_per_minute": 3.142254358717034 }, "by_page": [ { - "page_id": "phys_0007", - "cer": 0.046632124352331605, - "wer": 0.1652892561983471 + "page_id": "phys_0001", + "cer": 0.11654526534859522, + "wer": 0.46496815286624205 }, { - "page_id": "phys_0008", - "cer": 0.1458966565349544, - "wer": 0.2670807453416149 + "page_id": "phys_0002", + "cer": 0.2523041474654378, + "wer": 0.4 }, { - "page_id": "phys_0009", - "cer": 0.11836734693877551, - "wer": 0.2112676056338028 + "page_id": "phys_0003", + "cer": 0.09618717504332755, + "wer": 0.21787709497206703 } ] } }, { - "eval_workflow_id": "wf-data17_frak_simple_minimal_ocr-eval", - "label": "Workflow on data 17_frak_simple_minimal_ocr", + "eval_workflow_id": "wf-data18_frak_complex_estor_rechtsgelehrsamkeit02_1758_minimal_ocr-eval", + "label": "Workflow on data 18_frak_complex_estor_rechtsgelehrsamkeit02_1758_minimal_ocr", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", @@ -3145,16 +8658,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/17_frak_simple.ocrd.zip", - "label": "GT workspace 17th century Black letter simple layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", + "label": "GT workspace 18th century Black letter complex layout" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_simple_minimal_ocr_ocr.zip", - "label": "OCR workspace for 17_frak_simple_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_estor_rechtsgelehrsamkeit02_1758_minimal_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_complex_estor_rechtsgelehrsamkeit02_1758_minimal_ocr" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_simple_minimal_ocr_evaluation.zip", - "label": "Evaluation workspace for 17_frak_simple_minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_estor_rechtsgelehrsamkeit02_1758_minimal_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_complex_estor_rechtsgelehrsamkeit02_1758_minimal_ocr" }, "workflow_steps": [ { @@ -3189,52 +8702,58 @@ "document_metadata": { "data_properties": { "fonts": [ + "Antiqua", "Black Letter" ], - "publication_century": "1600-1700", + "publication_century": "1700-1800", "publication_decade": "", - "publication_year": "17th century", - "number_of_pages": 3, - "layout": "simple" + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 7.538366, - "cpu_time": 10.064678, - "cer_mean": 0.08427970798416445, - "cer_median": 0.09331797235023041, + "wall_time": 18.505333, + "cpu_time": 22.105623, + "cer_mean": 0.11589408928020027, + "cer_median": 0.10084584323499293, "cer_range": [ - 0.05025996533795494, - 0.10926118626430802 + 0.05331088664421998, + 0.2085737840065952 ], - "cer_standard_deviation": 0.030521364398782277, - "wer": 0.1842438386542771, - "pages_per_minute": 23.87785363565526 + "cer_standard_deviation": 0.07259145757108061, + "wer": 0.20102650242627845, + "pages_per_minute": 12.969234328287957 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.10926118626430802, - "wer": 0.2229299363057325 + "cer": 0.06310013717421124, + "wer": 0.11403508771929824 }, { "page_id": "phys_0002", - "cer": 0.09331797235023041, - "wer": 0.20689655172413793 + "cer": 0.13859154929577464, + "wer": 0.22340425531914893 }, { "page_id": "phys_0003", - "cer": 0.05025996533795494, - "wer": 0.12290502793296089 + "cer": 0.05331088664421998, + "wer": 0.13333333333333333 + }, + { + "page_id": "phys_0004", + "cer": 0.2085737840065952, + "wer": 0.3333333333333333 } ] } }, { - "eval_workflow_id": "wf-data16_ant_complex_selected_pages_ocr-eval", - "label": "Workflow on data 16_ant_complex_selected_pages_ocr", + "eval_workflow_id": "wf-data17_frak_complex_silesius_seelenlust01_1657_selected_pages_ocr-eval", + "label": "Workflow on data 17_frak_complex_silesius_seelenlust01_1657_selected_pages_ocr", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", @@ -3245,16 +8764,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_ant_complex.ocrd.zip", - "label": "GT workspace 16th century Antiqua complex layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/17_frak_complex.ocrd.zip", + "label": "GT workspace 17th century Black letter complex layout" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_complex_selected_pages_ocr_ocr.zip", - "label": "OCR workspace for 16_ant_complex_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_silesius_seelenlust01_1657_selected_pages_ocr_ocr.zip", + "label": "OCR workspace for 17_frak_complex_silesius_seelenlust01_1657_selected_pages_ocr" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_complex_selected_pages_ocr_evaluation.zip", - "label": "Evaluation workspace for 16_ant_complex_selected_pages_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/17_frak_complex_silesius_seelenlust01_1657_selected_pages_ocr_evaluation.zip", + "label": "Evaluation workspace for 17_frak_complex_silesius_seelenlust01_1657_selected_pages_ocr" }, "workflow_steps": [ { @@ -3357,167 +8876,98 @@ "document_metadata": { "data_properties": { "fonts": [ - "Antiqua" + "Antiqua", + "Black Letter" ], - "publication_century": "1500-1600", + "publication_century": "1600-1700", "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 3, + "publication_year": "17th century", + "number_of_pages": 5, "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 70.23542499999999, - "cpu_time": 122.07752, - "cer_mean": 0.11095656062741122, - "cer_median": 0.12244897959183673, + "wall_time": 108.68203399999999, + "cpu_time": 154.24126800000005, + "cer_mean": 0.2924684311074824, + "cer_median": 0.2583479789103691, "cer_range": [ - 0.07700101317122594, - 0.133419689119171 + 0.06676783004552352, + 0.7536585365853659 ], - "cer_standard_deviation": 0.02991360090611332, - "wer": 0.26341315349276156, - "pages_per_minute": 2.5628092945974204 + "cer_standard_deviation": 0.2774715020464407, + "wer": 0.4982391606465789, + "pages_per_minute": 2.7603458360008246 }, "by_page": [ { - "page_id": "phys_0007", - "cer": 0.133419689119171, - "wer": 0.36363636363636365 + "page_id": "phys_0001", + "cer": 0.08481262327416174, + "wer": 0.43478260869565216 }, { - "page_id": "phys_0008", - "cer": 0.07700101317122594, - "wer": 0.18012422360248448 + "page_id": "phys_0002", + "cer": 0.2583479789103691, + "wer": 0.5625 }, { - "page_id": "phys_0009", - "cer": 0.12244897959183673, - "wer": 0.24647887323943662 + "page_id": "phys_0003", + "cer": 0.7536585365853659, + "wer": 0.9014084507042254 + }, + { + "page_id": "phys_0004", + "cer": 0.06676783004552352, + "wer": 0.04838709677419355 + }, + { + "page_id": "phys_0005", + "cer": 0.2987551867219917, + "wer": 0.5441176470588235 } ] } }, { - "eval_workflow_id": "wf-data18_fontmix_complex_slower_processors_ocr-eval", - "label": "Workflow on data 18_fontmix_complex_slower_processors_ocr", + "eval_workflow_id": "wf-data18_frak_complex_euler_rechenkunst01_1738_minimal_ocr-eval", + "label": "Workflow on data 18_frak_complex_euler_rechenkunst01_1738_minimal_ocr", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", - "label": "OCR Workflow slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_fontmix_complex.ocrd.zip", - "label": "GT workspace 18th century Font Mix complex layout" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/18_frak_complex.ocrd.zip", + "label": "GT workspace 18th century Black letter complex layout" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_fontmix_complex_slower_processors_ocr_ocr.zip", - "label": "OCR workspace for 18_fontmix_complex_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_euler_rechenkunst01_1738_minimal_ocr_ocr.zip", + "label": "OCR workspace for 18_frak_complex_euler_rechenkunst01_1738_minimal_ocr" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_fontmix_complex_slower_processors_ocr_evaluation.zip", - "label": "Evaluation workspace for 18_fontmix_complex_slower_processors_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/18_frak_complex_euler_rechenkunst01_1738_minimal_ocr_evaluation.zip", + "label": "Evaluation workspace for 18_frak_complex_euler_rechenkunst01_1738_minimal_ocr" }, "workflow_steps": [ - { - "id": "ocrd-cis-ocropy-binarize", - "params": { - "method": "ocropy", - "threshold": 0.5, - "grayscale": false, - "maxskew": 0.0, - "noise_maxsize": 0, - "dpi": 0, - "level-of-operation": "page" - } - }, - { - "id": "ocrd-anybaseocr-crop", - "params": { - "dpi": 0, - "rulerRatioMax": 50.0, - "rulerRatioMin": 3.0, - "rulerAreaMax": 0.3, - "rulerAreaMin": 0.01, - "rulerWidthMax": 0.95, - "columnAreaMin": 0.05, - "columnSepWidthMax": 0.04, - "marginTop": 0.25, - "marginBottom": 0.75, - "marginLeft": 0.3, - "marginRight": 0.7, - "padding": 10 - } - }, - { - "id": "ocrd-cis-ocropy-denoise", - "params": { - "level-of-operation": "page", - "noise_maxsize": 3.0, - "dpi": 0 - } - }, - { - "id": "ocrd-tesserocr-deskew", - "params": { - "operation_level": "page", - "dpi": 0, - "min_orientation_confidence": 1.5 - } - }, - { - "id": "ocrd-tesserocr-segment", - "params": { - "dpi": 0, - "padding": 4, - "shrink_polygons": false, - "block_polygons": false, - "find_tables": true, - "find_staves": false, - "sparse_text": false, - "overwrite_segments": true, - "segmentation_level": "region", - "textequiv_level": "none", - "overwrite_text": true, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" - } - }, - { - "id": "ocrd-cis-ocropy-dewarp", - "params": { - "dpi": 0, - "range": 4.0, - "smoothness": 1.0, - "max_neighbour": 0.05 - } - }, { "id": "ocrd-tesserocr-recognize", "params": { - "textequiv_level": "glyph", - "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, "model": "Fraktur_GT4HistOCR", "dpi": 0, "padding": 0, - "segmentation_level": "word", + "overwrite_segments": false, "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, - "find_tables": true, "find_staves": false, "sparse_text": false, "raw_lines": false, @@ -3543,45 +8993,55 @@ "publication_century": "1700-1800", "publication_decade": "", "publication_year": "18th century", - "number_of_pages": 4, + "number_of_pages": 6, "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 72.53919699999999, - "cpu_time": 109.473651, - "cer_mean": 0.16726583056278752, - "cer_median": 0.09637318392327315, + "wall_time": 18.674437, + "cpu_time": 24.291901, + "cer_mean": 0.25835849983393794, + "cer_median": 0.230420483908856, "cer_range": [ - 0.03187250996015936, - 0.4444444444444444 + 0.08586296617519515, + 0.5747368421052632 ], - "cer_standard_deviation": 0.18889822286887584, - "wer": 0.28912998545359864, - "pages_per_minute": 3.3085560624554478 + "cer_standard_deviation": 0.16915619139008603, + "wer": 0.37827288927088376, + "pages_per_minute": 19.277689603172508 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.4444444444444444, - "wer": 0.6204379562043796 + "cer": 0.08586296617519515, + "wer": 0.10465116279069768 }, { "page_id": "phys_0002", - "cer": 0.03187250996015936, - "wer": 0.10919540229885058 + "cer": 0.5747368421052632, + "wer": 0.7987804878048781 }, { "page_id": "phys_0003", - "cer": 0.1266778523489933, - "wer": 0.2681564245810056 + "cer": 0.2767102229054573, + "wer": 0.3786407766990291 }, { "page_id": "phys_0004", - "cer": 0.06606851549755302, - "wer": 0.15873015873015872 + "cer": 0.22828282828282828, + "wer": 0.27741935483870966 + }, + { + "page_id": "phys_0005", + "cer": 0.152, + "wer": 0.27218934911242604 + }, + { + "page_id": "phys_0006", + "cer": 0.23255813953488372, + "wer": 0.43795620437956206 } ] } diff --git a/default_data_sources.txt b/default_data_sources.txt new file mode 100644 index 0000000000000000000000000000000000000000..d9b1ddd6ce17bbc7217efee73c9ee4885ccfb031 --- /dev/null +++ b/default_data_sources.txt @@ -0,0 +1,12 @@ +https://github.com/tboenig/16_frak_simple +https://github.com/tboenig/17_frak_simple +https://github.com/tboenig/17_frak_complex +https://github.com/tboenig/18_frak_simple +https://github.com/tboenig/18_frak_complex +https://github.com/tboenig/19_frak_simple +https://github.com/tboenig/16_ant_simple +https://github.com/tboenig/16_ant_complex +https://github.com/tboenig/18_ant_simple +https://github.com/tboenig/19_ant_simple +https://github.com/tboenig/17_fontmix_simple +https://github.com/tboenig/18_fontmix_complex diff --git a/docker-compose.yml b/docker-compose.yml index 21b37d5d5ef6ec0f2cc52a8b36d680199fc87470..c4be3efcab37ce0cfade9e4bf3683676c53c90fa 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,14 +1,16 @@ version: '3.4' services: - benchmarks: + app: build: context: . dockerfile: Dockerfile working_dir: /app volumes: - ./data:/app/data # this will write the results to your host system + - ./gt:/app/gt # mount your modules, custom workflows and data here - # - /path/to/models/on/host:/usr/local/share/ocrd-resources/ + - ./models:/usr/local/share/ocrd-resources/ + #- ./workflows:/app/workflows # - ./workflows/ocrd_workflows:/app/workflows/ocrd_workflows # - TODO/custom/data \ No newline at end of file diff --git a/prepare.sh b/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..cd60dcfa37f82000f312481a917e4ff01a0e6467 --- /dev/null +++ b/prepare.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +mkdir gt + +while IFS= read -r URL; do + OWNER=$(echo "$URL" | cut -d'/' -f4) + REPO=$(echo "$URL" | cut -d'/' -f5) + if [[ ! -f gt/"$REPO".zip ]]; then + echo "Downloading $REPO …" + RESULT=$(curl -L \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + https://api.github.com/repos/"$OWNER"/"$REPO"/releases/latest) + ZIP_URL=$(echo "$RESULT" | jq -r '.assets | .[].browser_download_url') + curl -L -o gt/"$REPO".zip "$ZIP_URL" + fi +done < default_data_sources.txt + +cd gt || exit +# the default data is structured like this: +# repository_name.zip +# |___ subordinate_work_1.zip +# |___ subordinate_work_2.zip +# |___ ... +# $ZIP refers to the release itself which is on level "repository_name.zip" +# the subordinate works are also OCR-D BagIts / zips. these are referred to by $INNER_ZIP. +for ZIP in *.zip; do + NAME=$(echo "$ZIP" | cut -d"." -f1) + echo "Processing $NAME" + unzip -qq -d "$NAME" "$ZIP" + mv "$NAME"/ocrdzip_out/* "$NAME" && rm -r "$NAME"/ocrdzip_out + for INNER_ZIP in "$NAME"/*.zip; do + echo "Dealing with inner zip files …" + INNER_ZIP_NAME=$(basename "$INNER_ZIP" .ocrd.zip) + unzip -qq -d "$NAME"/"$INNER_ZIP_NAME" "$INNER_ZIP" && rm "$INNER_ZIP" + + echo "Done." + done +done + +echo " … and ready to go!" diff --git a/src/benchmark_extraction.py b/src/benchmark_extraction.py index 728b1b965b50af7b2e1b31a5fc911ff14046839e..2dfe6c2c4d29671f20ef6de3471455adf7d9cd98 100644 --- a/src/benchmark_extraction.py +++ b/src/benchmark_extraction.py @@ -8,7 +8,6 @@ from os import listdir, scandir from statistics import stdev, median from typing import Any, Dict, List, Union -import yaml from .constants import METS, RESULTS, QUIVER_MAIN, OCRD @@ -113,7 +112,7 @@ def get_gt_workspace(workspace_path: str) -> Dict[str, str]: if split_workspace_name[1] == 'ant': font = 'Antiqua' elif split_workspace_name[1] == 'frak': - font = 'Black letter' + font = 'Fraktur' else: font = 'Font Mix' url = 'https://github.com/OCR-D/quiver-data/blob/main/' + workspace_name_wo_workflow + '.ocrd.zip' @@ -134,15 +133,15 @@ def get_document_metadata(workspace_path: str) -> Dict[str, Dict[str, str]]: 'layout': '' } } - with open(workspace_path + '/METADATA.yml', 'r', encoding='utf-8') as file: - metadata = yaml.safe_load(file) + with open(workspace_path + '/metadata.json', 'r', encoding='utf-8') as file: + metadata = json.load(file) scripts = metadata['script'] fonts = [] for script in scripts: if script == 'Latn': fonts.append('Antiqua') if script == 'Goth': - fonts.append('Black Letter') + fonts.append('Fraktur') if script == 'Hebr': fonts.append('Hebrew') if script == 'Grek': diff --git a/workflows/execute_workflows.sh b/workflows/execute_workflows.sh index 0429460a9093a61e89ebf397133692ac7cb709da..5f3f59f70675edac6b98004ad885d0beb49b2bdf 100755 --- a/workflows/execute_workflows.sh +++ b/workflows/execute_workflows.sh @@ -5,19 +5,129 @@ WORKFLOW_DIR="$ROOT"/workflows OCRD_WORKFLOW_DIR="$WORKFLOW_DIR"/ocrd_workflows WORKSPACE_DIR="$WORKFLOW_DIR"/workspaces -if [[ -d workflows/workspaces ]]; then +set -euo pipefail + +clean_up_dirs() { + if [[ -d workflows/workspaces ]]; then rm -rf workflows/workspaces -fi + fi -if [[ -d workflows/nf-results ]]; then - rm -rf workflows/nf-results -fi + if [[ -d workflows/nf-results ]]; then + rm -rf workflows/nf-results + fi -if [[ -d workflows/results ]]; then - rm -rf workflows/results -fi + if [[ -d workflows/results ]]; then + rm -rf workflows/results + fi +} -set -euo pipefail +convert_ocrd_wfs_to_NextFlow() { + cd "$OCRD_WORKFLOW_DIR" || exit + + echo "Convert OCR-D workflows to NextFlow …" + + mkdir -p "$WORKFLOW_DIR/nf-results" + + for FILE in *.txt + do + oton convert -I "$FILE" -O "$FILE".nf + # the venv part is not needed since we execute this in an image derived from ocrd/all:maximum + sed -i 's/source "${params.venv_path}"//g' "$FILE".nf + sed -i 's/deactivate//g' "$FILE".nf + done +} + +download_models() { + echo "Download the necessary models if not available" + if [[ ! -f /usr/local/share/ocrd-resources/ocrd-tesserocr-recognize/ ]] + then + mkdir -p /usr/local/share/ocrd-resources/ + ocrd resmgr download ocrd-tesserocr-recognize '*' + fi + if [[ ! -d /usr/local/share/ocrd-resources/ocrd-calamari-recognize/qurator-gt4histocr-1.0 ]] + then + mkdir -p /usr/local/share/ocrd-resources/ + ocrd resmgr download ocrd-calamari-recognize qurator-gt4histocr-1.0 + fi +} + +create_wf_specific_workspaces() { + # execute this workflow on the existing data (incl. evaluation) + mkdir -p "$WORKSPACE_DIR"/tmp + cd "$WORKSPACE_DIR" || exit + + # create workspace for all OCR workflows. + # each workflow has a separate workspace to work with. + echo "Create workflow specific workspaces for each dir in ./gt …" + for DIR in "$ROOT"/gt/*/; do + DIR_NAME=$(basename "$DIR") + if grep -q "multivolume work" <<< "$(cat $DIR/mets.xml)"; then + echo "$DIR_NAME is a multivolume work" + + for WORKFLOW in "$OCRD_WORKFLOW_DIR"/*ocr.txt.nf + do + WF_NAME=$(basename -s .txt.nf "$WORKFLOW") + for SUB_WORK in $DIR/*/; do + SUB_WORK_DIR_NAME=$(basename "$SUB_WORK") + TARGET="$WORKSPACE_DIR"/tmp/"$DIR_NAME"_"$SUB_WORK_DIR_NAME"_"$WF_NAME" + cp -r "$ROOT"/gt/"$DIR_NAME"/"$SUB_WORK_DIR_NAME" "$TARGET" + if [[ -f "$ROOT"/gt/"$DIR_NAME"/metadata.json ]]; then + cp -r "$ROOT"/gt/"$DIR_NAME"/metadata.json "$TARGET"/metadata.json + fi + cp "$WORKFLOW" "$TARGET"/data/ + done + + done + else + for WORKFLOW in "$OCRD_WORKFLOW_DIR"/*ocr.txt.nf + do + WF_NAME=$(basename -s .txt.nf "$WORKFLOW") + cp -r "$ROOT"/gt/"$DIR_NAME" "$WORKSPACE_DIR"/tmp/"$DIR_NAME"_"$WF_NAME" + cp "$WORKFLOW" "$WORKSPACE_DIR"/tmp/"$DIR_NAME"_"$WF_NAME"/ + done + fi + done +} + +clean_up_tmp_dirs() { + echo "Clean up intermediate dirs …" + for DIR in "$WORKSPACE_DIR"/tmp/* + do + DIR_NAME=$(basename "$DIR") + mv "$DIR" "$WORKSPACE_DIR"/"$DIR_NAME" + cp "$OCRD_WORKFLOW_DIR"/*eval.txt.nf "$WORKSPACE_DIR"/"$DIR_NAME" + ls "$WORKSPACE_DIR"/"$DIR_NAME" + cp -r "$WORKSPACE_DIR"/"$DIR_NAME"/data/* "$WORKSPACE_DIR"/"$DIR_NAME"/ + done + + rm -rf "$WORKSPACE_DIR"/tmp + rm -rf "$WORKSPACE_DIR"/log.log +} + +execute_wfs_and_extract_benchmarks() { + mkdir -p "$ROOT"/workflows/results + # for all data sets… + for WS_DIR in "$WORKSPACE_DIR"/* + do + if [ -d "$WS_DIR" ]; then + echo "Switching to $WS_DIR." + + DIR_NAME=$(basename $WS_DIR) + + run "$WS_DIR"/*ocr.txt.nf "$DIR_NAME" "$WS_DIR" + run "$WS_DIR"/*eval.txt.nf "$DIR_NAME" "$WS_DIR" + + # create a result JSON according to the specs + echo "Get Benchmark JSON …" + quiver benchmarks-extraction "$WS_DIR" "$WORKFLOW" + echo "Done." + + # move data to results dir + mv "$WS_DIR"/*.json "$WORKFLOW_DIR"/results + fi + done + cd "$ROOT" || exit +} adjust_workflow_settings() { # $1: $WORKFLOW @@ -68,103 +178,29 @@ save_workspaces() { mv "$WORKSPACE_DIR"/"$2".zip "$WORKFLOW_DIR"/results/"$2"_"$WORKFLOW_NAME".zip } -cd "$OCRD_WORKFLOW_DIR" || exit - -echo "Convert OCR-D workflows to NextFlow …" - -mkdir -p "$WORKFLOW_DIR/nf-results" - -for FILE in *.txt -do - oton convert -I "$FILE" -O "$FILE".nf - # the venv part is not needed since we execute this in an image derived from ocrd/all:maximum - sed -i 's/source "${params.venv_path}"//g' "$FILE".nf - sed -i 's/deactivate//g' "$FILE".nf -done - -# download the necessary models if not available -echo "Download the necessary models if not available" -if [[ ! -f /usr/local/share/ocrd-resources/ocrd-tesserocr-recognize/ ]] -then - mkdir -p /usr/local/share/ocrd-resources/ - ocrd resmgr download ocrd-tesserocr-recognize '*' -fi -if [[ ! -d /usr/local/share/ocrd-resources/ocrd-calamari-recognize/qurator-gt4histocr-1.0 ]] -then - mkdir -p /usr/local/share/ocrd-resources/ - ocrd resmgr download ocrd-calamari-recognize qurator-gt4histocr-1.0 -fi - -# execute this workflow on the existing data (incl. evaluation) -mkdir -p "$WORKSPACE_DIR"/tmp -cd "$WORKSPACE_DIR" || exit - -# create workspace for all OCR workflows. -# each workflow has a separate workspace to work with. - -echo "Restore OCR-D workspaces from BagIts and create workflow specific workspaces …" -for BAGIT in "$ROOT"/submodules/quiver-data/*.zip -do - BAGIT_NAME=$(basename -s .ocrd.zip "$BAGIT") - ocrd zip spill "$BAGIT" -d "$WORKSPACE_DIR"/tmp > "$WORKSPACE_DIR"/log.log - unzip "$BAGIT" METADATA.yml -d "$WORKSPACE_DIR"/tmp/"$BAGIT_NAME" - - for WORKFLOW in "$OCRD_WORKFLOW_DIR"/*ocr.txt.nf - do - WF_NAME=$(basename -s .txt.nf "$WORKFLOW") - cp -r "$WORKSPACE_DIR"/tmp/"$BAGIT_NAME" "$WORKSPACE_DIR"/tmp/"$BAGIT_NAME"_"$WF_NAME" - cp "$WORKFLOW" "$WORKSPACE_DIR"/tmp/"$BAGIT_NAME"_"$WF_NAME"/ - done - rm -r "$WORKSPACE_DIR"/tmp/"$BAGIT_NAME" -done - -echo "Clean up intermediate dirs …" -for DIR in "$WORKSPACE_DIR"/tmp/* -do - DIR_NAME=$(basename "$DIR") - mv "$DIR" "$WORKSPACE_DIR"/"$DIR_NAME" - cp "$OCRD_WORKFLOW_DIR"/*eval.txt.nf "$WORKSPACE_DIR"/"$DIR_NAME" -done - -rm -rf "$WORKSPACE_DIR"/tmp -rm -rf "$WORKSPACE_DIR"/log.log - -# start webserver for evaluation -uvicorn api:app --app-dir "$ROOT"/src & - -mkdir -p "$ROOT"/workflows/results - -# for all data sets… -for WS_DIR in "$WORKSPACE_DIR"/* -do - if [ -d "$WS_DIR" ]; then - echo "Switching to $WS_DIR." - - DIR_NAME=$(basename $WS_DIR) - - run "$WS_DIR"/*ocr.txt.nf "$DIR_NAME" "$WS_DIR" - run "$WS_DIR"/*eval.txt.nf "$DIR_NAME" "$WS_DIR" - - # create a result JSON according to the specs - echo "Get Benchmark JSON …" - quiver benchmarks-extraction "$WS_DIR" "$WORKFLOW" - echo "Done." - - # move data to results dir - mv "$WS_DIR"/*.json "$WORKFLOW_DIR"/results - fi -done +summarize_to_data_json() { + # summarize JSONs + echo "Summarize JSONs to one file …" + quiver summarize-benchmarks + echo "Done." +} -cd "$ROOT" || exit -# summarize JSONs -echo "Summarize JSONs to one file …" -quiver summarize-benchmarks -echo "Done." +final_clean_up() { + echo "Cleaning up …" + rm -rf "$WORKSPACE_DIR" + rm -rf "$ROOT"/work + rm -rf "$WORKFLOW_DIR"/nf-results + rm -rf "$WORKFLOW_DIR"/results + rm "$WORKFLOW_DIR"/ocrd_workflows/*.nf +} -echo "Cleaning up …" -rm -rf "$WORKSPACE_DIR" -rm -rf "$ROOT"/work -rm -rf "$WORKFLOW_DIR"/nf-results -rm -rf "$WORKFLOW_DIR"/results -rm "$WORKFLOW_DIR"/ocrd_workflows/*.nf +clean_up_dirs +convert_ocrd_wfs_to_NextFlow +download_models +create_wf_specific_workspaces +clean_up_tmp_dirs +uvicorn api:app --app-dir "$ROOT"/src & # start webserver for evaluation +execute_wfs_and_extract_benchmarks +summarize_to_data_json +final_clean_up