diff --git a/data/workflows.json b/data/workflows.json index bbc48ae81c1c2ddc97415a98015c2078a4047e0a..3440a0af2fe3a3f3907194da3cd7bbdf1005861a 100644 --- a/data/workflows.json +++ b/data/workflows.json @@ -1,228 +1,345 @@ [ { - "eval_workflow_id": "wf-datareichsanzeiger_tables-eval", - "label": "Workflow on data reichsanzeiger_tables", + "eval_workflow_id": "wf-datablumenbach_anatomie_1805-eval", + "label": "Workflow on data blumenbach_anatomie_1805", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/reichsanzeiger_tables.ocrd.zip", - "label": "GT workspace reichsanzeiger_tables" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/blumenbach_anatomie_1805.ocrd.zip", + "label": "GT workspace blumenbach_anatomie_1805" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_tables_ocr.zip", - "label": "OCR workspace for reichsanzeiger_tables" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/blumenbach_anatomie_1805_ocr.zip", + "label": "OCR workspace for blumenbach_anatomie_1805" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_tables_evaluation.zip", - "label": "Evaluation workspace for reichsanzeiger_tables" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/blumenbach_anatomie_1805_evaluation.zip", + "label": "Evaluation workspace for blumenbach_anatomie_1805" }, "workflow_steps": [ { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-binarize", "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 } } ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", "document_metadata": { "data_properties": { "fonts": [ - "Antiqua" + "Antiqua", + "Fraktur" ], - "publication_century": "1820-1939", + "publication_century": "1800-1900", "publication_decade": "", "publication_year": "19th century", - "number_of_pages": 5, - "layout": "reichsanzeiger-gt" + "number_of_pages": 3, + "layout": "simple" } } }, "evaluation_results": { "document_wide": { - "wall_time": 530.564159, - "cpu_time": 724.869668, - "cer_mean": 1.479972651687655, - "cer_median": 1.331401349741961, + "wall_time": 25.104598, + "cpu_time": 24.163375, + "cer_mean": 1.0, + "cer_median": 1.0, "cer_range": [ - 1.090097148229395, - 1.9778823058446757 + 1.0, + 1.0 ], - "cer_standard_deviation": 0.3854879135899152, - "wer": 1.7910299442503619, - "pages_per_minute": 0.5654358571175178 + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 7.170001288210232 }, "by_page": [ { - "page_id": "P_1877_7_0059", - "cer": 1.090097148229395, - "wer": 1.4218399401645474 - }, - { - "page_id": "P_1883_55_0044", - "cer": 1.2073319135990264, - "wer": 1.4705882352941178 - }, - { - "page_id": "P_1929_250_0019", - "cer": 1.331401349741961, - "wer": 1.7407221664994985 + "page_id": "phys_0001", + "cer": 1.0, + "wer": 1.0 }, { - "page_id": "P_1932_300_0488", - "cer": 1.9778823058446757, - "wer": 2.349493487698987 + "page_id": "phys_0002", + "cer": 1.0, + "wer": 1.0 }, { - "page_id": "P_1936_123_0292", - "cer": 1.7931505410232167, - "wer": 1.9725058915946583 + "page_id": "phys_0003", + "cer": 1.0, + "wer": 1.0 } ] } }, { - "eval_workflow_id": "wf-datasilesius_seelenlust01_1657-eval", - "label": "Workflow on data silesius_seelenlust01_1657", + "eval_workflow_id": "wf-databallenstedt_delatio_1777-eval", + "label": "Workflow on data ballenstedt_delatio_1777", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/silesius_seelenlust01_1657.ocrd.zip", - "label": "GT workspace silesius_seelenlust01_1657" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/ballenstedt_delatio_1777.ocrd.zip", + "label": "GT workspace ballenstedt_delatio_1777" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/silesius_seelenlust01_1657_ocr.zip", - "label": "OCR workspace for silesius_seelenlust01_1657" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/ballenstedt_delatio_1777_ocr.zip", + "label": "OCR workspace for ballenstedt_delatio_1777" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/silesius_seelenlust01_1657_evaluation.zip", - "label": "Evaluation workspace for silesius_seelenlust01_1657" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/ballenstedt_delatio_1777_evaluation.zip", + "label": "Evaluation workspace for ballenstedt_delatio_1777" }, "workflow_steps": [ { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-binarize", "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 } } ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", "document_metadata": { "data_properties": { "fonts": [ "Antiqua", "Fraktur" ], - "publication_century": "1600-1700", + "publication_century": "1700-1800", "publication_decade": "", - "publication_year": "17th century", - "number_of_pages": 5, - "layout": "complex" + "publication_year": "18th century", + "number_of_pages": 3, + "layout": "simple" } } }, "evaluation_results": { "document_wide": { - "wall_time": 13.583684, - "cpu_time": 17.039027, - "cer_mean": 0.3007055286105995, - "cer_median": 0.2951219512195122, + "wall_time": 31.663677000000007, + "cpu_time": 30.643176, + "cer_mean": 1.0, + "cer_median": 1.0, "cer_range": [ - 0.19271623672230653, - 0.44970414201183434 + 1.0, + 1.0 ], - "cer_standard_deviation": 0.10657123719012947, - "wer": 0.5174305966287508, - "pages_per_minute": 22.08531941703002 + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 5.6847472262933945 }, "by_page": [ { - "page_id": "phys_0001", - "cer": 0.44970414201183434, - "wer": 0.7101449275362319 - }, - { - "page_id": "phys_0002", - "cer": 0.20913884007029876, - "wer": 0.5104166666666666 - }, - { - "page_id": "phys_0003", - "cer": 0.2951219512195122, - "wer": 0.4647887323943662 + "page_id": "phys_00003", + "cer": 1.0, + "wer": 1.0 }, { - "page_id": "phys_0004", - "cer": 0.19271623672230653, - "wer": 0.3870967741935484 + "page_id": "phys_00005", + "cer": 1.0, + "wer": 1.0 }, { - "page_id": "phys_0005", - "cer": 0.35684647302904565, - "wer": 0.5147058823529411 + "page_id": "phys_00010", + "cer": 1.0, + "wer": 1.0 } ] } }, { - "eval_workflow_id": "wf-databenner_herrnhuterey04_1748-eval", - "label": "Workflow on data benner_herrnhuterey04_1748", + "eval_workflow_id": "wf-datahuebner_handbuch_1696-eval", + "label": "Workflow on data huebner_handbuch_1696", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", @@ -233,16 +350,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/benner_herrnhuterey04_1748.ocrd.zip", - "label": "GT workspace benner_herrnhuterey04_1748" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/huebner_handbuch_1696.ocrd.zip", + "label": "GT workspace huebner_handbuch_1696" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/benner_herrnhuterey04_1748_ocr.zip", - "label": "OCR workspace for benner_herrnhuterey04_1748" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/huebner_handbuch_1696_ocr.zip", + "label": "OCR workspace for huebner_handbuch_1696" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/benner_herrnhuterey04_1748_evaluation.zip", - "label": "Evaluation workspace for benner_herrnhuterey04_1748" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/huebner_handbuch_1696_evaluation.zip", + "label": "Evaluation workspace for huebner_handbuch_1696" }, "workflow_steps": [ { @@ -280,90 +397,154 @@ "Antiqua", "Fraktur" ], - "publication_century": "1700-1800", + "publication_century": "1600-1700", "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 4, + "publication_year": "17th century", + "number_of_pages": 3, "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 19.971749, - "cpu_time": 23.245253, - "cer_mean": 0.20433470587543373, - "cer_median": 0.1700173533179325, + "wall_time": 14.267812, + "cpu_time": 16.333578, + "cer_mean": 0.20627969569747484, + "cer_median": 0.08702290076335878, "cer_range": [ - 0.04063745019920319, - 0.43666666666666665 + 0.08487084870848709, + 0.44694533762057875 ], - "cer_standard_deviation": 0.19097881579976753, - "wer": 0.31449770246107817, - "pages_per_minute": 12.016974577439363 + "cer_standard_deviation": 0.20842533731220508, + "wer": 0.3271033769383102, + "pages_per_minute": 12.615809627993416 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.43666666666666665, - "wer": 0.583941605839416 + "cer": 0.44694533762057875, + "wer": 0.6341463414634146 }, { "page_id": "phys_0002", - "cer": 0.04063745019920319, - "wer": 0.10344827586206896 + "cer": 0.08702290076335878, + "wer": 0.1504424778761062 }, { "page_id": "phys_0003", - "cer": 0.05536912751677853, - "wer": 0.18435754189944134 - }, - { - "page_id": "phys_0004", - "cer": 0.28466557911908646, - "wer": 0.3862433862433862 + "cer": 0.08487084870848709, + "wer": 0.19672131147540983 } ] } }, { - "eval_workflow_id": "wf-databohse_helicon_1696-eval", - "label": "Workflow on data bohse_helicon_1696", + "eval_workflow_id": "wf-datareichsanzeiger_title_pages-eval", + "label": "Workflow on data reichsanzeiger_title_pages", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/bohse_helicon_1696.ocrd.zip", - "label": "GT workspace bohse_helicon_1696" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/reichsanzeiger_title_pages.ocrd.zip", + "label": "GT workspace reichsanzeiger_title_pages" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/bohse_helicon_1696_ocr.zip", - "label": "OCR workspace for bohse_helicon_1696" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_title_pages_ocr.zip", + "label": "OCR workspace for reichsanzeiger_title_pages" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/bohse_helicon_1696_evaluation.zip", - "label": "Evaluation workspace for bohse_helicon_1696" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_title_pages_evaluation.zip", + "label": "Evaluation workspace for reichsanzeiger_title_pages" }, "workflow_steps": [ { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-binarize", "params": { - "segmentation_level": "region", - "textequiv_level": "word", + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-tesserocr-crop", + "params": { + "dpi": 0, + "padding": 4 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, "model": "Fraktur_GT4HistOCR", "dpi": 0, "padding": 0, - "overwrite_segments": false, + "segmentation_level": "word", "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, + "find_tables": true, "find_staves": false, "sparse_text": false, "raw_lines": false, @@ -383,194 +564,335 @@ "document_metadata": { "data_properties": { "fonts": [ - "Antiqua", - "Fraktur" + "Antiqua" ], - "publication_century": "1600-1700", + "publication_century": "1820-1939", "publication_decade": "", - "publication_year": "17th century", + "publication_year": "19th century", "number_of_pages": 5, - "layout": "simple" + "layout": "reichsanzeiger-gt" } } }, "evaluation_results": { "document_wide": { - "wall_time": 16.040643, - "cpu_time": 19.265802, - "cer_mean": 0.40403175304113403, - "cer_median": 0.45517241379310347, + "wall_time": 2039.5017899999998, + "cpu_time": 3039.7441000000003, + "cer_mean": 0.503371617521129, + "cer_median": 0.413282477002468, "cer_range": [ - 0.1955040871934605, - 0.5079365079365079 + 0.08001625727131861, + 1.0 ], - "cer_standard_deviation": 0.12207840266111031, - "wer": 0.5356334009868995, - "pages_per_minute": 18.70249216318822 + "cer_standard_deviation": 0.3496032851993558, + "wer": 0.6522771332007985, + "pages_per_minute": 0.14709474709507367 }, "by_page": [ { - "page_id": "phys_0001", - "cer": 0.5079365079365079, - "wer": 0.6382978723404256 + "page_id": "P_1881_115_0163", + "cer": 0.413282477002468, + "wer": 0.5798611111111112 }, { - "page_id": "phys_0002", - "cer": 0.40540540540540543, - "wer": 0.5740740740740741 + "page_id": "P_1885_5_0054", + "cer": 0.08001625727131861, + "wer": 0.22626162018592297 }, { - "page_id": "phys_0003", - "cer": 0.45517241379310347, - "wer": 0.5467625899280576 + "page_id": "P_1887_134_0444", + "cer": 1.0, + "wer": 1.0 }, { - "page_id": "phys_0004", - "cer": 0.45614035087719296, - "wer": 0.6018518518518519 + "page_id": "P_1916_169_0087", + "cer": 0.6772530470255524, + "wer": 0.8682910206168002 }, { - "page_id": "phys_0005", - "cer": 0.1955040871934605, - "wer": 0.31718061674008813 + "page_id": "P_1918_267_0129", + "cer": 0.3463063063063063, + "wer": 0.5869719140901581 } ] } }, { - "eval_workflow_id": "wf-databuerger_gedichte_1778-eval", - "label": "Workflow on data buerger_gedichte_1778", + "eval_workflow_id": "wf-dataarnimb_goethe03_1835-eval", + "label": "Workflow on data arnimb_goethe03_1835", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/buerger_gedichte_1778.ocrd.zip", - "label": "GT workspace buerger_gedichte_1778" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/arnimb_goethe03_1835.ocrd.zip", + "label": "GT workspace arnimb_goethe03_1835" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/buerger_gedichte_1778_ocr.zip", - "label": "OCR workspace for buerger_gedichte_1778" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/arnimb_goethe03_1835_ocr.zip", + "label": "OCR workspace for arnimb_goethe03_1835" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/buerger_gedichte_1778_evaluation.zip", - "label": "Evaluation workspace for buerger_gedichte_1778" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/arnimb_goethe03_1835_evaluation.zip", + "label": "Evaluation workspace for arnimb_goethe03_1835" }, "workflow_steps": [ { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-binarize", "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 } } ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", "document_metadata": { "data_properties": { "fonts": [ - "Antiqua", "Fraktur" ], - "publication_century": "1700-1800", + "publication_century": "1800-1900", "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 2, - "layout": "complex" + "publication_year": "19th century", + "number_of_pages": 1, + "layout": "simple" } } }, "evaluation_results": { "document_wide": { - "wall_time": 5.921714, - "cpu_time": 7.016172, - "cer_mean": 0.10672693293515115, - "cer_median": 0.10672693293515115, + "wall_time": 6.831029999999999, + "cpu_time": 6.5121720000000005, + "cer_mean": 1.0, + "cer_median": 1.0, "cer_range": [ - 0.04484304932735426, - 0.16861081654294804 + 1.0, + 1.0 ], - "cer_standard_deviation": 0.08751702749046443, - "wer": 0.23135901859306116, - "pages_per_minute": 20.26440317786371 + "cer_standard_deviation": null, + "wer": 1.0, + "pages_per_minute": 8.783448469703691 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.16861081654294804, - "wer": 0.3546099290780142 - }, - { - "page_id": "phys_0002", - "cer": 0.04484304932735426, - "wer": 0.10810810810810811 + "cer": 1.0, + "wer": 1.0 } ] } }, { - "eval_workflow_id": "wf-datarollenhagen_reysen_1603-eval", - "label": "Workflow on data rollenhagen_reysen_1603", + "eval_workflow_id": "wf-datann_besuch_1780-eval", + "label": "Workflow on data nn_besuch_1780", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/rollenhagen_reysen_1603.ocrd.zip", - "label": "GT workspace rollenhagen_reysen_1603" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/nn_besuch_1780.ocrd.zip", + "label": "GT workspace nn_besuch_1780" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/rollenhagen_reysen_1603_ocr.zip", - "label": "OCR workspace for rollenhagen_reysen_1603" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/nn_besuch_1780_ocr.zip", + "label": "OCR workspace for nn_besuch_1780" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/rollenhagen_reysen_1603_evaluation.zip", - "label": "Evaluation workspace for rollenhagen_reysen_1603" - }, + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/nn_besuch_1780_evaluation.zip", + "label": "Evaluation workspace for nn_besuch_1780" + }, "workflow_steps": [ { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-binarize", "params": { - "segmentation_level": "region", - "textequiv_level": "word", + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, "model": "Fraktur_GT4HistOCR", "dpi": 0, "padding": 0, - "overwrite_segments": false, + "segmentation_level": "word", "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, + "find_tables": true, "find_staves": false, "sparse_text": false, "raw_lines": false, @@ -593,85 +915,170 @@ "Antiqua", "Fraktur" ], - "publication_century": "1600-1700", + "publication_century": "1700-1800", "publication_decade": "", - "publication_year": "17th century", - "number_of_pages": 3, - "layout": "simple" + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 16.777821, - "cpu_time": 19.16547, - "cer_mean": 0.21046220070684576, - "cer_median": 0.16929133858267717, + "wall_time": 42.463463, + "cpu_time": 61.604054, + "cer_mean": 0.07900170040044713, + "cer_median": 0.018462174141687326, "cer_range": [ - 0.14512471655328799, - 0.3169705469845722 + 0.013259668508287293, + 0.26582278481012656 ], - "cer_standard_deviation": 0.093027024434784, - "wer": 0.3190752126565147, - "pages_per_minute": 10.728449183001775 + "cer_standard_deviation": 0.1245848990441196, + "wer": 0.13667696498571524, + "pages_per_minute": 5.651917743967326 }, "by_page": [ { - "page_id": "phys_0001", - "cer": 0.3169705469845722, - "wer": 0.4649122807017544 + "page_id": "phys_00001", + "cer": 0.26582278481012656, + "wer": 0.3620689655172414 }, { - "page_id": "phys_0002", - "cer": 0.16929133858267717, - "wer": 0.28104575163398693 + "page_id": "phys_00002", + "cer": 0.013259668508287293, + "wer": 0.057803468208092484 }, { - "page_id": "phys_0003", - "cer": 0.14512471655328799, - "wer": 0.2112676056338028 + "page_id": "phys_00003", + "cer": 0.020697167755991286, + "wer": 0.06936416184971098 + }, + { + "page_id": "phys_00004", + "cer": 0.016227180527383367, + "wer": 0.05747126436781609 } ] } }, { - "eval_workflow_id": "wf-dataeuler_rechenkunst01_1738-eval", - "label": "Workflow on data euler_rechenkunst01_1738", + "eval_workflow_id": "wf-databernd_lebensbeschreibung_1738-eval", + "label": "Workflow on data bernd_lebensbeschreibung_1738", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/euler_rechenkunst01_1738.ocrd.zip", - "label": "GT workspace euler_rechenkunst01_1738" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/bernd_lebensbeschreibung_1738.ocrd.zip", + "label": "GT workspace bernd_lebensbeschreibung_1738" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/euler_rechenkunst01_1738_ocr.zip", - "label": "OCR workspace for euler_rechenkunst01_1738" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/bernd_lebensbeschreibung_1738_ocr.zip", + "label": "OCR workspace for bernd_lebensbeschreibung_1738" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/euler_rechenkunst01_1738_evaluation.zip", - "label": "Evaluation workspace for euler_rechenkunst01_1738" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/bernd_lebensbeschreibung_1738_evaluation.zip", + "label": "Evaluation workspace for bernd_lebensbeschreibung_1738" }, "workflow_steps": [ { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-binarize", "params": { - "segmentation_level": "region", - "textequiv_level": "word", + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, "model": "Fraktur_GT4HistOCR", "dpi": 0, "padding": 0, - "overwrite_segments": false, + "segmentation_level": "word", "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, + "find_tables": true, "find_staves": false, "sparse_text": false, "raw_lines": false, @@ -697,97 +1104,162 @@ "publication_century": "1700-1800", "publication_decade": "", "publication_year": "18th century", - "number_of_pages": 6, + "number_of_pages": 3, "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 18.514995, - "cpu_time": 23.577633, - "cer_mean": 0.25835849983393794, - "cer_median": 0.230420483908856, + "wall_time": 30.325404000000006, + "cpu_time": 46.848454, + "cer_mean": 0.08856691777926955, + "cer_median": 0.03746177370030581, "cer_range": [ - 0.08586296617519515, - 0.5747368421052632 + 0.0290519877675841, + 0.1991869918699187 ], - "cer_standard_deviation": 0.16915619139008603, - "wer": 0.37827288927088376, - "pages_per_minute": 19.443699552713895 + "cer_standard_deviation": 0.09589203157053293, + "wer": 0.14344277534022457, + "pages_per_minute": 5.935617543627777 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.08586296617519515, - "wer": 0.10465116279069768 + "cer": 0.1991869918699187, + "wer": 0.27941176470588236 }, { "page_id": "phys_0002", - "cer": 0.5747368421052632, - "wer": 0.7987804878048781 + "cer": 0.0290519877675841, + "wer": 0.05357142857142857 }, { "page_id": "phys_0003", - "cer": 0.2767102229054573, - "wer": 0.3786407766990291 - }, - { - "page_id": "phys_0004", - "cer": 0.22828282828282828, - "wer": 0.27741935483870966 - }, - { - "page_id": "phys_0005", - "cer": 0.152, - "wer": 0.27218934911242604 - }, - { - "page_id": "phys_0006", - "cer": 0.23255813953488372, - "wer": 0.43795620437956206 + "cer": 0.03746177370030581, + "wer": 0.09734513274336283 } ] } }, { - "eval_workflow_id": "wf-datakistler_kraeuter_1500-eval", - "label": "Workflow on data kistler_kraeuter_1500", + "eval_workflow_id": "wf-dataluther_auszlegunge_1520-eval", + "label": "Workflow on data luther_auszlegunge_1520", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/kistler_kraeuter_1500.ocrd.zip", - "label": "GT workspace kistler_kraeuter_1500" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/luther_auszlegunge_1520.ocrd.zip", + "label": "GT workspace luther_auszlegunge_1520" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/kistler_kraeuter_1500_ocr.zip", - "label": "OCR workspace for kistler_kraeuter_1500" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/luther_auszlegunge_1520_ocr.zip", + "label": "OCR workspace for luther_auszlegunge_1520" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/kistler_kraeuter_1500_evaluation.zip", - "label": "Evaluation workspace for kistler_kraeuter_1500" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/luther_auszlegunge_1520_evaluation.zip", + "label": "Evaluation workspace for luther_auszlegunge_1520" }, "workflow_steps": [ { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-binarize", "params": { - "segmentation_level": "region", - "textequiv_level": "word", + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, "model": "Fraktur_GT4HistOCR", "dpi": 0, "padding": 0, - "overwrite_segments": false, + "segmentation_level": "word", "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, + "find_tables": true, "find_staves": false, "sparse_text": false, "raw_lines": false, @@ -820,35 +1292,35 @@ }, "evaluation_results": { "document_wide": { - "wall_time": 7.833456, - "cpu_time": 9.538595, - "cer_mean": 0.1809783660358373, - "cer_median": 0.1809783660358373, + "wall_time": 27.237084000000003, + "cpu_time": 36.259029, + "cer_mean": 0.5161379724470825, + "cer_median": 0.5161379724470825, "cer_range": [ - 0.13479623824451412, - 0.2271604938271605 + 0.0754414125200642, + 0.9568345323741008 ], - "cer_standard_deviation": 0.06531139146173669, - "wer": 0.408373786407767, - "pages_per_minute": 15.318909048573197 + "cer_standard_deviation": 0.6232390519399567, + "wer": 0.6192780337941628, + "pages_per_minute": 4.405757973210347 }, "by_page": [ { - "page_id": "phys_0007", - "cer": 0.2271604938271605, - "wer": 0.441747572815534 + "page_id": "phys_0003", + "cer": 0.9568345323741008, + "wer": 0.9666666666666667 }, { - "page_id": "phys_0021", - "cer": 0.13479623824451412, - "wer": 0.375 + "page_id": "phys_0029", + "cer": 0.0754414125200642, + "wer": 0.271889400921659 } ] } }, { - "eval_workflow_id": "wf-datacalvi_beutelschneider01_1627-eval", - "label": "Workflow on data calvi_beutelschneider01_1627", + "eval_workflow_id": "wf-databohse_helicon_1696-eval", + "label": "Workflow on data bohse_helicon_1696", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", @@ -859,16 +1331,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/calvi_beutelschneider01_1627.ocrd.zip", - "label": "GT workspace calvi_beutelschneider01_1627" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/bohse_helicon_1696.ocrd.zip", + "label": "GT workspace bohse_helicon_1696" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/calvi_beutelschneider01_1627_ocr.zip", - "label": "OCR workspace for calvi_beutelschneider01_1627" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/bohse_helicon_1696_ocr.zip", + "label": "OCR workspace for bohse_helicon_1696" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/calvi_beutelschneider01_1627_evaluation.zip", - "label": "Evaluation workspace for calvi_beutelschneider01_1627" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/bohse_helicon_1696_evaluation.zip", + "label": "Evaluation workspace for bohse_helicon_1696" }, "workflow_steps": [ { @@ -909,82 +1381,172 @@ "publication_century": "1600-1700", "publication_decade": "", "publication_year": "17th century", - "number_of_pages": 3, + "number_of_pages": 5, "layout": "simple" } } }, "evaluation_results": { "document_wide": { - "wall_time": 7.37275, - "cpu_time": 9.77562, - "cer_mean": 0.08254540344028655, - "cer_median": 0.09331797235023041, + "wall_time": 15.131289, + "cpu_time": 18.178699, + "cer_mean": 0.40403175304113403, + "cer_median": 0.45517241379310347, "cer_range": [ - 0.05025996533795494, - 0.1040582726326743 + 0.1955040871934605, + 0.5079365079365079 ], - "cer_standard_deviation": 0.02847104928930229, - "wer": 0.1821206964037463, - "pages_per_minute": 24.41422806958055 + "cer_standard_deviation": 0.12207840266111031, + "wer": 0.5356334009868995, + "pages_per_minute": 19.826466866107705 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.1040582726326743, - "wer": 0.21656050955414013 + "cer": 0.5079365079365079, + "wer": 0.6382978723404256 }, { "page_id": "phys_0002", - "cer": 0.09331797235023041, - "wer": 0.20689655172413793 + "cer": 0.40540540540540543, + "wer": 0.5740740740740741 }, { "page_id": "phys_0003", - "cer": 0.05025996533795494, - "wer": 0.12290502793296089 + "cer": 0.45517241379310347, + "wer": 0.5467625899280576 + }, + { + "page_id": "phys_0004", + "cer": 0.45614035087719296, + "wer": 0.6018518518518519 + }, + { + "page_id": "phys_0005", + "cer": 0.1955040871934605, + "wer": 0.31718061674008813 } ] } }, { - "eval_workflow_id": "wf-dataarnimb_goethe03_1835-eval", - "label": "Workflow on data arnimb_goethe03_1835", + "eval_workflow_id": "wf-datacalvi_beutelschneider01_1627-eval", + "label": "Workflow on data calvi_beutelschneider01_1627", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/arnimb_goethe03_1835.ocrd.zip", - "label": "GT workspace arnimb_goethe03_1835" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/calvi_beutelschneider01_1627.ocrd.zip", + "label": "GT workspace calvi_beutelschneider01_1627" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/arnimb_goethe03_1835_ocr.zip", - "label": "OCR workspace for arnimb_goethe03_1835" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/calvi_beutelschneider01_1627_ocr.zip", + "label": "OCR workspace for calvi_beutelschneider01_1627" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/arnimb_goethe03_1835_evaluation.zip", - "label": "Evaluation workspace for arnimb_goethe03_1835" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/calvi_beutelschneider01_1627_evaluation.zip", + "label": "Evaluation workspace for calvi_beutelschneider01_1627" }, "workflow_steps": [ { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-binarize", "params": { - "segmentation_level": "region", - "textequiv_level": "word", + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, "model": "Fraktur_GT4HistOCR", "dpi": 0, "padding": 0, - "overwrite_segments": false, + "segmentation_level": "word", "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, + "find_tables": true, "find_staves": false, "sparse_text": false, "raw_lines": false, @@ -1004,143 +1566,217 @@ "document_metadata": { "data_properties": { "fonts": [ + "Antiqua", "Fraktur" ], - "publication_century": "1800-1900", + "publication_century": "1600-1700", "publication_decade": "", - "publication_year": "19th century", - "number_of_pages": 1, + "publication_year": "17th century", + "number_of_pages": 3, "layout": "simple" } } }, "evaluation_results": { "document_wide": { - "wall_time": 4.707975, - "cpu_time": 5.426059, - "cer_mean": 0.004721435316336166, - "cer_median": 0.004721435316336166, + "wall_time": 37.645064000000005, + "cpu_time": 55.114228999999995, + "cer_mean": 0.15501219595245352, + "cer_median": 0.11654526534859522, "cer_range": [ - 0.004721435316336166, - 0.004721435316336166 + 0.09618717504332755, + 0.2523041474654378 ], - "cer_standard_deviation": null, - "wer": 0.015873015873015872, - "pages_per_minute": 12.744332754528221 + "cer_standard_deviation": 0.08486993479509113, + "wer": 0.360948415946103, + "pages_per_minute": 4.781503359909283 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.004721435316336166, - "wer": 0.015873015873015872 + "cer": 0.11654526534859522, + "wer": 0.46496815286624205 + }, + { + "page_id": "phys_0002", + "cer": 0.2523041474654378, + "wer": 0.4 + }, + { + "page_id": "phys_0003", + "cer": 0.09618717504332755, + "wer": 0.21787709497206703 } ] } }, { - "eval_workflow_id": "wf-datablumenbach_anatomie_1805-eval", - "label": "Workflow on data blumenbach_anatomie_1805", + "eval_workflow_id": "wf-datakistler_kraeuter_1500-eval", + "label": "Workflow on data kistler_kraeuter_1500", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/blumenbach_anatomie_1805.ocrd.zip", - "label": "GT workspace blumenbach_anatomie_1805" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/kistler_kraeuter_1500.ocrd.zip", + "label": "GT workspace kistler_kraeuter_1500" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/blumenbach_anatomie_1805_ocr.zip", - "label": "OCR workspace for blumenbach_anatomie_1805" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/kistler_kraeuter_1500_ocr.zip", + "label": "OCR workspace for kistler_kraeuter_1500" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/blumenbach_anatomie_1805_evaluation.zip", - "label": "Evaluation workspace for blumenbach_anatomie_1805" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/kistler_kraeuter_1500_evaluation.zip", + "label": "Evaluation workspace for kistler_kraeuter_1500" }, "workflow_steps": [ { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-binarize", "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 } } ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", "document_metadata": { "data_properties": { "fonts": [ "Antiqua", "Fraktur" ], - "publication_century": "1800-1900", + "publication_century": "1500-1600", "publication_decade": "", - "publication_year": "19th century", - "number_of_pages": 3, + "publication_year": "16th century", + "number_of_pages": 2, "layout": "simple" } } }, "evaluation_results": { "document_wide": { - "wall_time": 13.404813, - "cpu_time": 15.520204, - "cer_mean": 0.08328200324172261, - "cer_median": 0.08736842105263158, + "wall_time": 14.867348999999999, + "cpu_time": 14.321257, + "cer_mean": 1.0, + "cer_median": 1.0, "cer_range": [ - 0.04055496264674493, - 0.12192262602579132 + 1.0, + 1.0 ], - "cer_standard_deviation": 0.04083746158658049, - "wer": 0.23519468186134854, - "pages_per_minute": 13.428012759297722 + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 8.071378427990089 }, "by_page": [ { - "page_id": "phys_0001", - "cer": 0.08736842105263158, - "wer": 0.22666666666666666 - }, - { - "page_id": "phys_0002", - "cer": 0.04055496264674493, - "wer": 0.14814814814814814 + "page_id": "phys_0007", + "cer": 1.0, + "wer": 1.0 }, { - "page_id": "phys_0003", - "cer": 0.12192262602579132, - "wer": 0.33076923076923076 + "page_id": "phys_0021", + "cer": 1.0, + "wer": 1.0 } ] } }, { - "eval_workflow_id": "wf-datareichsanzeiger_title_pages-eval", - "label": "Workflow on data reichsanzeiger_title_pages", + "eval_workflow_id": "wf-dataeuler_rechenkunst01_1738-eval", + "label": "Workflow on data euler_rechenkunst01_1738", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", @@ -1151,16 +1787,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/reichsanzeiger_title_pages.ocrd.zip", - "label": "GT workspace reichsanzeiger_title_pages" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/euler_rechenkunst01_1738.ocrd.zip", + "label": "GT workspace euler_rechenkunst01_1738" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_title_pages_ocr.zip", - "label": "OCR workspace for reichsanzeiger_title_pages" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/euler_rechenkunst01_1738_ocr.zip", + "label": "OCR workspace for euler_rechenkunst01_1738" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_title_pages_evaluation.zip", - "label": "Evaluation workspace for reichsanzeiger_title_pages" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/euler_rechenkunst01_1738_evaluation.zip", + "label": "Evaluation workspace for euler_rechenkunst01_1738" }, "workflow_steps": [ { @@ -1195,97 +1831,183 @@ "document_metadata": { "data_properties": { "fonts": [ - "Antiqua" + "Antiqua", + "Fraktur" ], - "publication_century": "1820-1939", + "publication_century": "1700-1800", "publication_decade": "", - "publication_year": "19th century", - "number_of_pages": 5, - "layout": "reichsanzeiger-gt" + "publication_year": "18th century", + "number_of_pages": 6, + "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 470.537927, - "cpu_time": 603.540066, - "cer_mean": 0.5074594248191785, - "cer_median": 0.3389393598117977, + "wall_time": 16.461667, + "cpu_time": 21.693217, + "cer_mean": 0.25835849983393794, + "cer_median": 0.230420483908856, "cer_range": [ - 0.09154875911296263, - 1.0 + 0.08586296617519515, + 0.5747368421052632 ], - "cer_standard_deviation": 0.36783069273412317, - "wer": 0.630009265947987, - "pages_per_minute": 0.6375681593037663 + "cer_standard_deviation": 0.16915619139008603, + "wer": 0.37827288927088376, + "pages_per_minute": 21.86898811645261 }, "by_page": [ { - "page_id": "P_1881_115_0163", - "cer": 0.3377672361293631, - "wer": 0.4791666666666667 + "page_id": "phys_0001", + "cer": 0.08586296617519515, + "wer": 0.10465116279069768 }, { - "page_id": "P_1885_5_0054", - "cer": 0.09154875911296263, - "wer": 0.20966135458167331 + "page_id": "phys_0002", + "cer": 0.5747368421052632, + "wer": 0.7987804878048781 }, { - "page_id": "P_1887_134_0444", - "cer": 1.0, - "wer": 1.0 + "page_id": "phys_0003", + "cer": 0.2767102229054573, + "wer": 0.3786407766990291 }, { - "page_id": "P_1916_169_0087", - "cer": 0.3389393598117977, - "wer": 0.5072414380644062 + "page_id": "phys_0004", + "cer": 0.22828282828282828, + "wer": 0.27741935483870966 }, { - "page_id": "P_1918_267_0129", - "cer": 0.769041769041769, - "wer": 0.953976870427189 + "page_id": "phys_0005", + "cer": 0.152, + "wer": 0.27218934911242604 + }, + { + "page_id": "phys_0006", + "cer": 0.23255813953488372, + "wer": 0.43795620437956206 } ] } }, { - "eval_workflow_id": "wf-dataluther_auszlegunge_1520-eval", - "label": "Workflow on data luther_auszlegunge_1520", + "eval_workflow_id": "wf-databenner_herrnhuterey04_1748-eval", + "label": "Workflow on data benner_herrnhuterey04_1748", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/luther_auszlegunge_1520.ocrd.zip", - "label": "GT workspace luther_auszlegunge_1520" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/benner_herrnhuterey04_1748.ocrd.zip", + "label": "GT workspace benner_herrnhuterey04_1748" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/luther_auszlegunge_1520_ocr.zip", - "label": "OCR workspace for luther_auszlegunge_1520" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/benner_herrnhuterey04_1748_ocr.zip", + "label": "OCR workspace for benner_herrnhuterey04_1748" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/luther_auszlegunge_1520_evaluation.zip", - "label": "Evaluation workspace for luther_auszlegunge_1520" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/benner_herrnhuterey04_1748_evaluation.zip", + "label": "Evaluation workspace for benner_herrnhuterey04_1748" }, "workflow_steps": [ { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-binarize", "params": { - "segmentation_level": "region", - "textequiv_level": "word", + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, "model": "Fraktur_GT4HistOCR", "dpi": 0, "padding": 0, - "overwrite_segments": false, + "segmentation_level": "word", "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, + "find_tables": true, "find_staves": false, "sparse_text": false, "raw_lines": false, @@ -1308,45 +2030,55 @@ "Antiqua", "Fraktur" ], - "publication_century": "1500-1600", + "publication_century": "1700-1800", "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 2, - "layout": "simple" + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 10.160861, - "cpu_time": 11.973182, - "cer_mean": 0.24855845660550213, - "cer_median": 0.24855845660550213, + "wall_time": 58.189493, + "cpu_time": 87.823723, + "cer_mean": 0.16726583056278752, + "cer_median": 0.09637318392327315, "cer_range": [ - 0.07865168539325842, - 0.4184652278177458 + 0.03187250996015936, + 0.4444444444444444 ], - "cer_standard_deviation": 0.2402844601873776, - "wer": 0.37300307219662054, - "pages_per_minute": 11.81002279235982 + "cer_standard_deviation": 0.18889822286887584, + "wer": 0.28912998545359864, + "pages_per_minute": 4.12445593915039 }, "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.4444444444444444, + "wer": 0.6204379562043796 + }, + { + "page_id": "phys_0002", + "cer": 0.03187250996015936, + "wer": 0.10919540229885058 + }, { "page_id": "phys_0003", - "cer": 0.4184652278177458, - "wer": 0.48333333333333334 + "cer": 0.1266778523489933, + "wer": 0.2681564245810056 }, { - "page_id": "phys_0029", - "cer": 0.07865168539325842, - "wer": 0.2626728110599078 + "page_id": "phys_0004", + "cer": 0.06606851549755302, + "wer": 0.15873015873015872 } ] } }, { - "eval_workflow_id": "wf-datahuebner_handbuch_1696-eval", - "label": "Workflow on data huebner_handbuch_1696", + "eval_workflow_id": "wf-databernd_lebensbeschreibung_1738-eval", + "label": "Workflow on data bernd_lebensbeschreibung_1738", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", @@ -1357,16 +2089,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/huebner_handbuch_1696.ocrd.zip", - "label": "GT workspace huebner_handbuch_1696" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/bernd_lebensbeschreibung_1738.ocrd.zip", + "label": "GT workspace bernd_lebensbeschreibung_1738" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/huebner_handbuch_1696_ocr.zip", - "label": "OCR workspace for huebner_handbuch_1696" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/bernd_lebensbeschreibung_1738_ocr.zip", + "label": "OCR workspace for bernd_lebensbeschreibung_1738" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/huebner_handbuch_1696_evaluation.zip", - "label": "Evaluation workspace for huebner_handbuch_1696" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/bernd_lebensbeschreibung_1738_evaluation.zip", + "label": "Evaluation workspace for bernd_lebensbeschreibung_1738" }, "workflow_steps": [ { @@ -1404,9 +2136,9 @@ "Antiqua", "Fraktur" ], - "publication_century": "1600-1700", + "publication_century": "1700-1800", "publication_decade": "", - "publication_year": "17th century", + "publication_year": "18th century", "number_of_pages": 3, "layout": "complex" } @@ -1414,146 +2146,207 @@ }, "evaluation_results": { "document_wide": { - "wall_time": 15.981865, - "cpu_time": 18.106327, - "cer_mean": 0.20627969569747484, - "cer_median": 0.08702290076335878, + "wall_time": 7.753105, + "cpu_time": 9.89478, + "cer_mean": 0.1790109644215708, + "cer_median": 0.05504587155963303, "cer_range": [ - 0.08487084870848709, - 0.44694533762057875 + 0.008409785932721712, + 0.4735772357723577 ], - "cer_standard_deviation": 0.20842533731220508, - "wer": 0.3271033769383102, - "pages_per_minute": 11.262765640931143 + "cer_standard_deviation": 0.2561653709691831, + "wer": 0.2391410103864555, + "pages_per_minute": 23.216504871274154 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.44694533762057875, - "wer": 0.6341463414634146 + "cer": 0.4735772357723577, + "wer": 0.5882352941176471 }, { "page_id": "phys_0002", - "cer": 0.08702290076335878, - "wer": 0.1504424778761062 + "cer": 0.05504587155963303, + "wer": 0.09821428571428571 }, { "page_id": "phys_0003", - "cer": 0.08487084870848709, - "wer": 0.19672131147540983 + "cer": 0.008409785932721712, + "wer": 0.030973451327433628 } ] } }, { - "eval_workflow_id": "wf-datann_besuch_1780-eval", - "label": "Workflow on data nn_besuch_1780", + "eval_workflow_id": "wf-datareichsanzeiger_tables-eval", + "label": "Workflow on data reichsanzeiger_tables", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/nn_besuch_1780.ocrd.zip", - "label": "GT workspace nn_besuch_1780" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/reichsanzeiger_tables.ocrd.zip", + "label": "GT workspace reichsanzeiger_tables" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/nn_besuch_1780_ocr.zip", - "label": "OCR workspace for nn_besuch_1780" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_tables_ocr.zip", + "label": "OCR workspace for reichsanzeiger_tables" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/nn_besuch_1780_evaluation.zip", - "label": "Evaluation workspace for nn_besuch_1780" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_tables_evaluation.zip", + "label": "Evaluation workspace for reichsanzeiger_tables" }, "workflow_steps": [ { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-binarize", "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" + "level-of-operation": "page" } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Fraktur" - ], - "publication_century": "1700-1800", + }, + { + "id": "ocrd-tesserocr-crop", + "params": { + "dpi": 0, + "padding": 4 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua" + ], + "publication_century": "1820-1939", "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 4, - "layout": "complex" + "publication_year": "19th century", + "number_of_pages": 5, + "layout": "reichsanzeiger-gt" } } }, "evaluation_results": { "document_wide": { - "wall_time": 14.316583, - "cpu_time": 16.701302, - "cer_mean": 0.06315407734401027, - "cer_median": 0.026726016076928857, + "wall_time": 1761.044629, + "cpu_time": 1719.1173, + "cer_mean": 1.0, + "cer_median": 1.0, "cer_range": [ - 0.01878453038674033, - 0.18037974683544303 + 1.0, + 1.0 ], - "cer_standard_deviation": 0.07825196427362012, - "wer": 0.11662846322503488, - "pages_per_minute": 16.76377666374721 + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 0.17035343401282987 }, "by_page": [ { - "page_id": "phys_00001", - "cer": 0.18037974683544303, - "wer": 0.1896551724137931 + "page_id": "P_1877_7_0059", + "cer": 1.0, + "wer": 1.0 }, { - "page_id": "phys_00002", - "cer": 0.01878453038674033, - "wer": 0.08670520231213873 + "page_id": "P_1883_55_0044", + "cer": 1.0, + "wer": 1.0 }, { - "page_id": "phys_00003", - "cer": 0.02505446623093682, - "wer": 0.08670520231213873 + "page_id": "P_1929_250_0019", + "cer": 1.0, + "wer": 1.0 }, { - "page_id": "phys_00004", - "cer": 0.028397565922920892, - "wer": 0.10344827586206896 + "page_id": "P_1932_300_0488", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "P_1936_123_0292", + "cer": 1.0, + "wer": 1.0 } ] } }, { - "eval_workflow_id": "wf-databallenstedt_delatio_1777-eval", - "label": "Workflow on data ballenstedt_delatio_1777", + "eval_workflow_id": "wf-datacalvi_beutelschneider01_1627-eval", + "label": "Workflow on data calvi_beutelschneider01_1627", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", @@ -1564,16 +2357,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/ballenstedt_delatio_1777.ocrd.zip", - "label": "GT workspace ballenstedt_delatio_1777" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/calvi_beutelschneider01_1627.ocrd.zip", + "label": "GT workspace calvi_beutelschneider01_1627" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/ballenstedt_delatio_1777_ocr.zip", - "label": "OCR workspace for ballenstedt_delatio_1777" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/calvi_beutelschneider01_1627_ocr.zip", + "label": "OCR workspace for calvi_beutelschneider01_1627" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/ballenstedt_delatio_1777_evaluation.zip", - "label": "Evaluation workspace for ballenstedt_delatio_1777" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/calvi_beutelschneider01_1627_evaluation.zip", + "label": "Evaluation workspace for calvi_beutelschneider01_1627" }, "workflow_steps": [ { @@ -1611,9 +2404,9 @@ "Antiqua", "Fraktur" ], - "publication_century": "1700-1800", + "publication_century": "1600-1700", "publication_decade": "", - "publication_year": "18th century", + "publication_year": "17th century", "number_of_pages": 3, "layout": "simple" } @@ -1621,75 +2414,155 @@ }, "evaluation_results": { "document_wide": { - "wall_time": 21.444519, - "cpu_time": 24.028484, - "cer_mean": 0.06438361522903834, - "cer_median": 0.03969957081545064, + "wall_time": 6.47648, + "cpu_time": 8.9799, + "cer_mean": 0.08254540344028655, + "cer_median": 0.09331797235023041, "cer_range": [ - 0.021764032073310423, - 0.13168724279835392 + 0.05025996533795494, + 0.1040582726326743 ], - "cer_standard_deviation": 0.058972490200809365, - "wer": 0.16906902212925057, - "pages_per_minute": 8.393753200992757 + "cer_standard_deviation": 0.02847104928930229, + "wer": 0.1821206964037463, + "pages_per_minute": 27.792875142052477 }, "by_page": [ { - "page_id": "phys_00003", - "cer": 0.021764032073310423, - "wer": 0.12236286919831224 + "page_id": "phys_0001", + "cer": 0.1040582726326743, + "wer": 0.21656050955414013 }, { - "page_id": "phys_00005", - "cer": 0.13168724279835392, - "wer": 0.2603305785123967 + "page_id": "phys_0002", + "cer": 0.09331797235023041, + "wer": 0.20689655172413793 }, { - "page_id": "phys_00010", - "cer": 0.03969957081545064, - "wer": 0.1245136186770428 + "page_id": "phys_0003", + "cer": 0.05025996533795494, + "wer": 0.12290502793296089 } ] } }, { - "eval_workflow_id": "wf-datareichsanzeiger_many_ads-eval", - "label": "Workflow on data reichsanzeiger_many_ads", + "eval_workflow_id": "wf-dataweigel_gnothi02_1618-eval", + "label": "Workflow on data weigel_gnothi02_1618", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/reichsanzeiger_many_ads.ocrd.zip", - "label": "GT workspace reichsanzeiger_many_ads" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/weigel_gnothi02_1618.ocrd.zip", + "label": "GT workspace weigel_gnothi02_1618" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_many_ads_ocr.zip", - "label": "OCR workspace for reichsanzeiger_many_ads" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/weigel_gnothi02_1618_ocr.zip", + "label": "OCR workspace for weigel_gnothi02_1618" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_many_ads_evaluation.zip", - "label": "Evaluation workspace for reichsanzeiger_many_ads" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/weigel_gnothi02_1618_evaluation.zip", + "label": "Evaluation workspace for weigel_gnothi02_1618" }, "workflow_steps": [ { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-binarize", "params": { - "segmentation_level": "region", - "textequiv_level": "word", + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, "model": "Fraktur_GT4HistOCR", "dpi": 0, "padding": 0, - "overwrite_segments": false, + "segmentation_level": "word", "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, + "find_tables": true, "find_staves": false, "sparse_text": false, "raw_lines": false, @@ -1709,113 +2582,8362 @@ "document_metadata": { "data_properties": { "fonts": [ - "Antiqua" + "Antiqua", + "Fraktur" ], - "publication_century": "1820-1939", + "publication_century": "1600-1700", "publication_decade": "", - "publication_year": "19th century", - "number_of_pages": 5, - "layout": "reichsanzeiger-gt" + "publication_year": "17th century", + "number_of_pages": 4, + "layout": "simple" } } }, "evaluation_results": { "document_wide": { - "wall_time": 344.194331, - "cpu_time": 401.989378, - "cer_mean": 1.3340240550306781, - "cer_median": 0.7412443874278384, + "wall_time": 73.18760400000001, + "cpu_time": 107.389301, + "cer_mean": 0.07997168401781861, + "cer_median": 0.07157755385146221, "cer_range": [ - 0.3960932753867003, - 3.7402255639097746 + 0.043933054393305436, + 0.13279857397504458 ], - "cer_standard_deviation": 1.378192503303974, - "wer": 1.8653811990174032, - "pages_per_minute": 0.8716006423708356 + "cer_standard_deviation": 0.03821904307003693, + "wer": 0.18620716224746828, + "pages_per_minute": 3.2792438457201025 }, "by_page": [ { - "page_id": "P_1871_155_0279", - "cer": 0.3960932753867003, - "wer": 0.49564980967917344 - }, - { - "page_id": "P_1871_65_0045", - "cer": 0.7412443874278384, - "wer": 0.947814451382694 + "page_id": "phys_0001", + "cer": 0.13279857397504458, + "wer": 0.2717948717948718 }, { - "page_id": "P_1873_1_0017", - "cer": 0.5852251348300515, - "wer": 0.6788418708240535 + "page_id": "phys_0002", + "cer": 0.06286836935166994, + "wer": 0.20948616600790515 }, { - "page_id": "P_1881_1_0662", - "cer": 3.7402255639097746, - "wer": 5.734011627906977 + "page_id": "phys_0003", + "cer": 0.08028673835125448, + "wer": 0.16483516483516483 }, { - "page_id": "P_1883_55_0044", - "cer": 1.2073319135990264, - "wer": 1.4705882352941178 + "page_id": "phys_0004", + "cer": 0.043933054393305436, + "wer": 0.09871244635193133 + } + ] + } + }, + { + "eval_workflow_id": "wf-databenner_herrnhuterey04_1748-eval", + "label": "Workflow on data benner_herrnhuterey04_1748", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/benner_herrnhuterey04_1748.ocrd.zip", + "label": "GT workspace benner_herrnhuterey04_1748" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/benner_herrnhuterey04_1748_ocr.zip", + "label": "OCR workspace for benner_herrnhuterey04_1748" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/benner_herrnhuterey04_1748_evaluation.zip", + "label": "Evaluation workspace for benner_herrnhuterey04_1748" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 17.756173, + "cpu_time": 21.102642, + "cer_mean": 0.20433470587543373, + "cer_median": 0.1700173533179325, + "cer_range": [ + 0.04063745019920319, + 0.43666666666666665 + ], + "cer_standard_deviation": 0.19097881579976753, + "wer": 0.31449770246107817, + "pages_per_minute": 13.51642609023915 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.43666666666666665, + "wer": 0.583941605839416 + }, + { + "page_id": "phys_0002", + "cer": 0.04063745019920319, + "wer": 0.10344827586206896 + }, + { + "page_id": "phys_0003", + "cer": 0.05536912751677853, + "wer": 0.18435754189944134 + }, + { + "page_id": "phys_0004", + "cer": 0.28466557911908646, + "wer": 0.3862433862433862 + } + ] + } + }, + { + "eval_workflow_id": "wf-datacalvi_beutelschneider01_1627-eval", + "label": "Workflow on data calvi_beutelschneider01_1627", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/calvi_beutelschneider01_1627.ocrd.zip", + "label": "GT workspace calvi_beutelschneider01_1627" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/calvi_beutelschneider01_1627_ocr.zip", + "label": "OCR workspace for calvi_beutelschneider01_1627" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/calvi_beutelschneider01_1627_evaluation.zip", + "label": "Evaluation workspace for calvi_beutelschneider01_1627" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 133, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1600-1700", + "publication_decade": "", + "publication_year": "17th century", + "number_of_pages": 3, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 27.008717, + "cpu_time": 26.946455000000004, + "cer_mean": 1.0, + "cer_median": 1.0, + "cer_range": [ + 1.0, + 1.0 + ], + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 6.664515015652168 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0002", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0003", + "cer": 1.0, + "wer": 1.0 + } + ] + } + }, + { + "eval_workflow_id": "wf-dataalberti_pictura_1540-eval", + "label": "Workflow on data alberti_pictura_1540", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/alberti_pictura_1540.ocrd.zip", + "label": "GT workspace alberti_pictura_1540" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/alberti_pictura_1540_ocr.zip", + "label": "OCR workspace for alberti_pictura_1540" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/alberti_pictura_1540_evaluation.zip", + "label": "Evaluation workspace for alberti_pictura_1540" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 3, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 14.196548, + "cpu_time": 13.619762999999999, + "cer_mean": 1.0, + "cer_median": 1.0, + "cer_range": [ + 1.0, + 1.0 + ], + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 12.67913861876845 + }, + "by_page": [ + { + "page_id": "phys_0007", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0008", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0009", + "cer": 1.0, + "wer": 1.0 + } + ] + } + }, + { + "eval_workflow_id": "wf-datakistler_kraeuter_1500-eval", + "label": "Workflow on data kistler_kraeuter_1500", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/kistler_kraeuter_1500.ocrd.zip", + "label": "GT workspace kistler_kraeuter_1500" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/kistler_kraeuter_1500_ocr.zip", + "label": "OCR workspace for kistler_kraeuter_1500" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/kistler_kraeuter_1500_evaluation.zip", + "label": "Evaluation workspace for kistler_kraeuter_1500" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 2, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 7.076052, + "cpu_time": 8.821135, + "cer_mean": 0.1809783660358373, + "cer_median": 0.1809783660358373, + "cer_range": [ + 0.13479623824451412, + 0.2271604938271605 + ], + "cer_standard_deviation": 0.06531139146173669, + "wer": 0.408373786407767, + "pages_per_minute": 16.95860912271419 + }, + "by_page": [ + { + "page_id": "phys_0007", + "cer": 0.2271604938271605, + "wer": 0.441747572815534 + }, + { + "page_id": "phys_0021", + "cer": 0.13479623824451412, + "wer": 0.375 + } + ] + } + }, + { + "eval_workflow_id": "wf-datareichsanzeiger_tables-eval", + "label": "Workflow on data reichsanzeiger_tables", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/reichsanzeiger_tables.ocrd.zip", + "label": "GT workspace reichsanzeiger_tables" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_tables_ocr.zip", + "label": "OCR workspace for reichsanzeiger_tables" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_tables_evaluation.zip", + "label": "Evaluation workspace for reichsanzeiger_tables" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua" + ], + "publication_century": "1820-1939", + "publication_decade": "", + "publication_year": "19th century", + "number_of_pages": 5, + "layout": "reichsanzeiger-gt" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 437.353829, + "cpu_time": 575.346313, + "cer_mean": 1.479972651687655, + "cer_median": 1.331401349741961, + "cer_range": [ + 1.090097148229395, + 1.9778823058446757 + ], + "cer_standard_deviation": 0.3854879135899152, + "wer": 1.7910299442503619, + "pages_per_minute": 0.6859434629529676 + }, + "by_page": [ + { + "page_id": "P_1877_7_0059", + "cer": 1.090097148229395, + "wer": 1.4218399401645474 + }, + { + "page_id": "P_1883_55_0044", + "cer": 1.2073319135990264, + "wer": 1.4705882352941178 + }, + { + "page_id": "P_1929_250_0019", + "cer": 1.331401349741961, + "wer": 1.7407221664994985 + }, + { + "page_id": "P_1932_300_0488", + "cer": 1.9778823058446757, + "wer": 2.349493487698987 + }, + { + "page_id": "P_1936_123_0292", + "cer": 1.7931505410232167, + "wer": 1.9725058915946583 + } + ] + } + }, + { + "eval_workflow_id": "wf-datareichsanzeiger_random-eval", + "label": "Workflow on data reichsanzeiger_random", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/reichsanzeiger_random.ocrd.zip", + "label": "GT workspace reichsanzeiger_random" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_random_ocr.zip", + "label": "OCR workspace for reichsanzeiger_random" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_random_evaluation.zip", + "label": "Evaluation workspace for reichsanzeiger_random" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua" + ], + "publication_century": "1820-1939", + "publication_decade": "", + "publication_year": "19th century", + "number_of_pages": 6, + "layout": "reichsanzeiger-gt" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 577.257552, + "cpu_time": 763.98953, + "cer_mean": 1.1094634128044334, + "cer_median": 0.8993673021608106, + "cer_range": [ + 0.30019453260980855, + 1.9778823058446757 + ], + "cer_standard_deviation": 0.645494993792005, + "wer": 1.3304151477151094, + "pages_per_minute": 0.6236384413728727 + }, + "by_page": [ + { + "page_id": "P_1879_45_0344", + "cer": 0.9843462873477166, + "wer": 1.1759884281581485 + }, + { + "page_id": "P_1885_5_0055", + "cer": 0.30019453260980855, + "wer": 0.40127817019845274 + }, + { + "page_id": "P_1889_1_0018", + "cer": 0.7868184930272782, + "wer": 0.9704142011834319 + }, + { + "page_id": "P_1891_33_0452", + "cer": 0.8143883169739047, + "wer": 1.112810707456979 + }, + { + "page_id": "P_1932_300_0488", + "cer": 1.9778823058446757, + "wer": 2.349493487698987 + }, + { + "page_id": "P_1936_123_0292", + "cer": 1.7931505410232167, + "wer": 1.9725058915946583 + } + ] + } + }, + { + "eval_workflow_id": "wf-datarollenhagen_reysen_1603-eval", + "label": "Workflow on data rollenhagen_reysen_1603", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/rollenhagen_reysen_1603.ocrd.zip", + "label": "GT workspace rollenhagen_reysen_1603" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/rollenhagen_reysen_1603_ocr.zip", + "label": "OCR workspace for rollenhagen_reysen_1603" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/rollenhagen_reysen_1603_evaluation.zip", + "label": "Evaluation workspace for rollenhagen_reysen_1603" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1600-1700", + "publication_decade": "", + "publication_year": "17th century", + "number_of_pages": 3, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 60.776995, + "cpu_time": 83.81908, + "cer_mean": 0.20072324583561607, + "cer_median": 0.19980314960629922, + "cer_range": [ + 0.19198790627362056, + 0.21037868162692847 + ], + "cer_standard_deviation": 0.009229847632943988, + "wer": 0.34944871614736, + "pages_per_minute": 2.9616469192002666 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.21037868162692847, + "wer": 0.4298245614035088 + }, + { + "page_id": "phys_0002", + "cer": 0.19980314960629922, + "wer": 0.3790849673202614 + }, + { + "page_id": "phys_0003", + "cer": 0.19198790627362056, + "wer": 0.23943661971830985 + } + ] + } + }, + { + "eval_workflow_id": "wf-datajusti_abhandlung01_1758-eval", + "label": "Workflow on data justi_abhandlung01_1758", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/justi_abhandlung01_1758.ocrd.zip", + "label": "GT workspace justi_abhandlung01_1758" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/justi_abhandlung01_1758_ocr.zip", + "label": "OCR workspace for justi_abhandlung01_1758" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/justi_abhandlung01_1758_evaluation.zip", + "label": "Evaluation workspace for justi_abhandlung01_1758" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 12.158114, + "cpu_time": 15.372055, + "cer_mean": 0.12942563348778183, + "cer_median": 0.13278777025964683, + "cer_range": [ + 0.09893550407013149, + 0.15319148936170213 + ], + "cer_standard_deviation": 0.022501323128153037, + "wer": 0.19483806092245076, + "pages_per_minute": 19.73990373835942 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.13375130616509928, + "wer": 0.2 + }, + { + "page_id": "phys_0002", + "cer": 0.15319148936170213, + "wer": 0.22916666666666666 + }, + { + "page_id": "phys_0003", + "cer": 0.09893550407013149, + "wer": 0.12992125984251968 + }, + { + "page_id": "phys_0004", + "cer": 0.1318242343541944, + "wer": 0.22026431718061673 + } + ] + } + }, + { + "eval_workflow_id": "wf-databuerger_gedichte_1778-eval", + "label": "Workflow on data buerger_gedichte_1778", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/buerger_gedichte_1778.ocrd.zip", + "label": "GT workspace buerger_gedichte_1778" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/buerger_gedichte_1778_ocr.zip", + "label": "OCR workspace for buerger_gedichte_1778" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/buerger_gedichte_1778_evaluation.zip", + "label": "Evaluation workspace for buerger_gedichte_1778" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 2, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 15.012823999999998, + "cpu_time": 23.767021000000003, + "cer_mean": 0.22786023044476886, + "cer_median": 0.22786023044476886, + "cer_range": [ + 0.053811659192825115, + 0.4019088016967126 + ], + "cer_standard_deviation": 0.24614184997615882, + "wer": 0.35125551082997897, + "pages_per_minute": 7.993166375626599 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.4019088016967126, + "wer": 0.5673758865248227 + }, + { + "page_id": "phys_0002", + "cer": 0.053811659192825115, + "wer": 0.13513513513513514 + } + ] + } + }, + { + "eval_workflow_id": "wf-datatrota_mordtbrenner_1540-eval", + "label": "Workflow on data trota_mordtbrenner_1540", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/trota_mordtbrenner_1540.ocrd.zip", + "label": "GT workspace trota_mordtbrenner_1540" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/trota_mordtbrenner_1540_ocr.zip", + "label": "OCR workspace for trota_mordtbrenner_1540" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/trota_mordtbrenner_1540_evaluation.zip", + "label": "Evaluation workspace for trota_mordtbrenner_1540" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 2, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 16.12696, + "cpu_time": 15.415920999999999, + "cer_mean": 1.0, + "cer_median": 1.0, + "cer_range": [ + 1.0, + 1.0 + ], + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 7.440956014028682 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0002", + "cer": 1.0, + "wer": 1.0 + } + ] + } + }, + { + "eval_workflow_id": "wf-datasilesius_seelenlust01_1657-eval", + "label": "Workflow on data silesius_seelenlust01_1657", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/silesius_seelenlust01_1657.ocrd.zip", + "label": "GT workspace silesius_seelenlust01_1657" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/silesius_seelenlust01_1657_ocr.zip", + "label": "OCR workspace for silesius_seelenlust01_1657" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/silesius_seelenlust01_1657_evaluation.zip", + "label": "Evaluation workspace for silesius_seelenlust01_1657" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1600-1700", + "publication_decade": "", + "publication_year": "17th century", + "number_of_pages": 5, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 48.011177999999994, + "cpu_time": 74.53987599999999, + "cer_mean": 0.16456973001701142, + "cer_median": 0.11229135053110774, + "cer_range": [ + 0.05917159763313609, + 0.34146341463414637 + ], + "cer_standard_deviation": 0.11554307430913685, + "wer": 0.34312458192490203, + "pages_per_minute": 6.248544870113373 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.05917159763313609, + "wer": 0.2463768115942029 + }, + { + "page_id": "phys_0002", + "cer": 0.0913884007029877, + "wer": 0.28125 + }, + { + "page_id": "phys_0003", + "cer": 0.34146341463414637, + "wer": 0.5774647887323944 + }, + { + "page_id": "phys_0004", + "cer": 0.11229135053110774, + "wer": 0.1693548387096774 + }, + { + "page_id": "phys_0005", + "cer": 0.21853388658367912, + "wer": 0.4411764705882353 + } + ] + } + }, + { + "eval_workflow_id": "wf-datalessing_menschengeschlecht_1780-eval", + "label": "Workflow on data lessing_menschengeschlecht_1780", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/lessing_menschengeschlecht_1780.ocrd.zip", + "label": "GT workspace lessing_menschengeschlecht_1780" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/lessing_menschengeschlecht_1780_ocr.zip", + "label": "OCR workspace for lessing_menschengeschlecht_1780" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/lessing_menschengeschlecht_1780_evaluation.zip", + "label": "Evaluation workspace for lessing_menschengeschlecht_1780" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 1, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 5.051292, + "cpu_time": 4.899471, + "cer_mean": 1.0, + "cer_median": 1.0, + "cer_range": [ + 1.0, + 1.0 + ], + "cer_standard_deviation": null, + "wer": 1.0, + "pages_per_minute": 11.87814919430514 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 1.0, + "wer": 1.0 + } + ] + } + }, + { + "eval_workflow_id": "wf-dataweigel_gnothi02_1618-eval", + "label": "Workflow on data weigel_gnothi02_1618", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/weigel_gnothi02_1618.ocrd.zip", + "label": "GT workspace weigel_gnothi02_1618" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/weigel_gnothi02_1618_ocr.zip", + "label": "OCR workspace for weigel_gnothi02_1618" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/weigel_gnothi02_1618_evaluation.zip", + "label": "Evaluation workspace for weigel_gnothi02_1618" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1600-1700", + "publication_decade": "", + "publication_year": "17th century", + "number_of_pages": 4, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 25.781608, + "cpu_time": 29.469248, + "cer_mean": 0.11399763421865425, + "cer_median": 0.09987627660053089, + "cer_range": [ + 0.07531106745252128, + 0.18092691622103388 + ], + "cer_standard_deviation": 0.047514032403792344, + "wer": 0.19377061794956923, + "pages_per_minute": 9.308961644285338 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.18092691622103388, + "wer": 0.3282051282051282 + }, + { + "page_id": "phys_0002", + "cer": 0.07531106745252128, + "wer": 0.15019762845849802 + }, + { + "page_id": "phys_0003", + "cer": 0.11397849462365592, + "wer": 0.15934065934065933 + }, + { + "page_id": "phys_0004", + "cer": 0.08577405857740586, + "wer": 0.13733905579399142 + } + ] + } + }, + { + "eval_workflow_id": "wf-dataestor_rechtsgelehrsamkeit02_1758-eval", + "label": "Workflow on data estor_rechtsgelehrsamkeit02_1758", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/estor_rechtsgelehrsamkeit02_1758.ocrd.zip", + "label": "GT workspace estor_rechtsgelehrsamkeit02_1758" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/estor_rechtsgelehrsamkeit02_1758_ocr.zip", + "label": "OCR workspace for estor_rechtsgelehrsamkeit02_1758" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/estor_rechtsgelehrsamkeit02_1758_evaluation.zip", + "label": "Evaluation workspace for estor_rechtsgelehrsamkeit02_1758" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 17.466218, + "cpu_time": 20.755792, + "cer_mean": 0.11589408928020027, + "cer_median": 0.10084584323499293, + "cer_range": [ + 0.05331088664421998, + 0.2085737840065952 + ], + "cer_standard_deviation": 0.07259145757108061, + "wer": 0.20102650242627845, + "pages_per_minute": 13.740810975793385 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.06310013717421124, + "wer": 0.11403508771929824 + }, + { + "page_id": "phys_0002", + "cer": 0.13859154929577464, + "wer": 0.22340425531914893 + }, + { + "page_id": "phys_0003", + "cer": 0.05331088664421998, + "wer": 0.13333333333333333 + }, + { + "page_id": "phys_0004", + "cer": 0.2085737840065952, + "wer": 0.3333333333333333 + } + ] + } + }, + { + "eval_workflow_id": "wf-dataheyden_paedono_1548-eval", + "label": "Workflow on data heyden_paedono_1548", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/heyden_paedono_1548.ocrd.zip", + "label": "GT workspace heyden_paedono_1548" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/heyden_paedono_1548_ocr.zip", + "label": "OCR workspace for heyden_paedono_1548" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/heyden_paedono_1548_evaluation.zip", + "label": "Evaluation workspace for heyden_paedono_1548" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 3, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 7.339199, + "cpu_time": 9.278232, + "cer_mean": 0.07452119312897007, + "cer_median": 0.0629800307219662, + "cer_range": [ + 0.037037037037037035, + 0.12354651162790697 + ], + "cer_standard_deviation": 0.044394494261965886, + "wer": 0.22683890077340793, + "pages_per_minute": 24.525837220110805 + }, + "by_page": [ + { + "page_id": "phys_0007", + "cer": 0.0629800307219662, + "wer": 0.21359223300970873 + }, + { + "page_id": "phys_0013", + "cer": 0.12354651162790697, + "wer": 0.33980582524271846 + }, + { + "page_id": "phys_0014", + "cer": 0.037037037037037035, + "wer": 0.1271186440677966 + } + ] + } + }, + { + "eval_workflow_id": "wf-databuerger_gedichte_1778-eval", + "label": "Workflow on data buerger_gedichte_1778", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/buerger_gedichte_1778.ocrd.zip", + "label": "GT workspace buerger_gedichte_1778" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/buerger_gedichte_1778_ocr.zip", + "label": "OCR workspace for buerger_gedichte_1778" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/buerger_gedichte_1778_evaluation.zip", + "label": "Evaluation workspace for buerger_gedichte_1778" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 2, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 5.502369, + "cpu_time": 6.645983, + "cer_mean": 0.10672693293515115, + "cer_median": 0.10672693293515115, + "cer_range": [ + 0.04484304932735426, + 0.16861081654294804 + ], + "cer_standard_deviation": 0.08751702749046443, + "wer": 0.23135901859306116, + "pages_per_minute": 21.80878817832828 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.16861081654294804, + "wer": 0.3546099290780142 + }, + { + "page_id": "phys_0002", + "cer": 0.04484304932735426, + "wer": 0.10810810810810811 + } + ] + } + }, + { + "eval_workflow_id": "wf-dataluther_auszlegunge_1520-eval", + "label": "Workflow on data luther_auszlegunge_1520", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/luther_auszlegunge_1520.ocrd.zip", + "label": "GT workspace luther_auszlegunge_1520" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/luther_auszlegunge_1520_ocr.zip", + "label": "OCR workspace for luther_auszlegunge_1520" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/luther_auszlegunge_1520_evaluation.zip", + "label": "Evaluation workspace for luther_auszlegunge_1520" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 2, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 18.242696, + "cpu_time": 17.645239, + "cer_mean": 1.0, + "cer_median": 1.0, + "cer_range": [ + 1.0, + 1.0 + ], + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 6.577975097540407 + }, + "by_page": [ + { + "page_id": "phys_0003", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0029", + "cer": 1.0, + "wer": 1.0 + } + ] + } + }, + { + "eval_workflow_id": "wf-databohse_helicon_1696-eval", + "label": "Workflow on data bohse_helicon_1696", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/bohse_helicon_1696.ocrd.zip", + "label": "GT workspace bohse_helicon_1696" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/bohse_helicon_1696_ocr.zip", + "label": "OCR workspace for bohse_helicon_1696" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/bohse_helicon_1696_evaluation.zip", + "label": "Evaluation workspace for bohse_helicon_1696" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1600-1700", + "publication_decade": "", + "publication_year": "17th century", + "number_of_pages": 5, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 51.137511999999994, + "cpu_time": 78.57060399999999, + "cer_mean": 0.17535728446779447, + "cer_median": 0.14864864864864866, + "cer_range": [ + 0.08232118758434548, + 0.35185185185185186 + ], + "cer_standard_deviation": 0.1049688941973522, + "wer": 0.3836611930189032, + "pages_per_minute": 5.866534922543749 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.35185185185185186, + "wer": 0.7021276595744681 + }, + { + "page_id": "phys_0002", + "cer": 0.14864864864864866, + "wer": 0.35185185185185186 + }, + { + "page_id": "phys_0003", + "cer": 0.1781609195402299, + "wer": 0.381294964028777 + }, + { + "page_id": "phys_0004", + "cer": 0.08232118758434548, + "wer": 0.24074074074074073 + }, + { + "page_id": "phys_0005", + "cer": 0.11580381471389646, + "wer": 0.2422907488986784 + } + ] + } + }, + { + "eval_workflow_id": "wf-datalessing_menschengeschlecht_1780-eval", + "label": "Workflow on data lessing_menschengeschlecht_1780", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/lessing_menschengeschlecht_1780.ocrd.zip", + "label": "GT workspace lessing_menschengeschlecht_1780" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/lessing_menschengeschlecht_1780_ocr.zip", + "label": "OCR workspace for lessing_menschengeschlecht_1780" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/lessing_menschengeschlecht_1780_evaluation.zip", + "label": "Evaluation workspace for lessing_menschengeschlecht_1780" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 1, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 7.084619, + "cpu_time": 10.402543999999999, + "cer_mean": 0.0199501246882793, + "cer_median": 0.0199501246882793, + "cer_range": [ + 0.0199501246882793, + 0.0199501246882793 + ], + "cer_standard_deviation": null, + "wer": 0.09836065573770492, + "pages_per_minute": 8.469051052710103 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.0199501246882793, + "wer": 0.09836065573770492 + } + ] + } + }, + { + "eval_workflow_id": "wf-datareichsanzeiger_random-eval", + "label": "Workflow on data reichsanzeiger_random", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/reichsanzeiger_random.ocrd.zip", + "label": "GT workspace reichsanzeiger_random" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_random_ocr.zip", + "label": "OCR workspace for reichsanzeiger_random" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_random_evaluation.zip", + "label": "Evaluation workspace for reichsanzeiger_random" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-tesserocr-crop", + "params": { + "dpi": 0, + "padding": 4 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua" + ], + "publication_century": "1820-1939", + "publication_decade": "", + "publication_year": "19th century", + "number_of_pages": 6, + "layout": "reichsanzeiger-gt" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 1758.9687809999998, + "cpu_time": 1685.08457, + "cer_mean": 1.0, + "cer_median": 1.0, + "cer_range": [ + 1.0, + 1.0 + ], + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 0.20466537205699278 + }, + "by_page": [ + { + "page_id": "P_1879_45_0344", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "P_1885_5_0055", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "P_1889_1_0018", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "P_1891_33_0452", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "P_1932_300_0488", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "P_1936_123_0292", + "cer": 1.0, + "wer": 1.0 + } + ] + } + }, + { + "eval_workflow_id": "wf-datasilesius_seelenlust01_1657-eval", + "label": "Workflow on data silesius_seelenlust01_1657", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/silesius_seelenlust01_1657.ocrd.zip", + "label": "GT workspace silesius_seelenlust01_1657" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/silesius_seelenlust01_1657_ocr.zip", + "label": "OCR workspace for silesius_seelenlust01_1657" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/silesius_seelenlust01_1657_evaluation.zip", + "label": "Evaluation workspace for silesius_seelenlust01_1657" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1600-1700", + "publication_decade": "", + "publication_year": "17th century", + "number_of_pages": 5, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 11.083143, + "cpu_time": 14.729302, + "cer_mean": 0.3007055286105995, + "cer_median": 0.2951219512195122, + "cer_range": [ + 0.19271623672230653, + 0.44970414201183434 + ], + "cer_standard_deviation": 0.10657123719012947, + "wer": 0.5174305966287508, + "pages_per_minute": 27.068134012166045 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.44970414201183434, + "wer": 0.7101449275362319 + }, + { + "page_id": "phys_0002", + "cer": 0.20913884007029876, + "wer": 0.5104166666666666 + }, + { + "page_id": "phys_0003", + "cer": 0.2951219512195122, + "wer": 0.4647887323943662 + }, + { + "page_id": "phys_0004", + "cer": 0.19271623672230653, + "wer": 0.3870967741935484 + }, + { + "page_id": "phys_0005", + "cer": 0.35684647302904565, + "wer": 0.5147058823529411 + } + ] + } + }, + { + "eval_workflow_id": "wf-datasilesius_seelenlust01_1657-eval", + "label": "Workflow on data silesius_seelenlust01_1657", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/silesius_seelenlust01_1657.ocrd.zip", + "label": "GT workspace silesius_seelenlust01_1657" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/silesius_seelenlust01_1657_ocr.zip", + "label": "OCR workspace for silesius_seelenlust01_1657" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/silesius_seelenlust01_1657_evaluation.zip", + "label": "Evaluation workspace for silesius_seelenlust01_1657" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1600-1700", + "publication_decade": "", + "publication_year": "17th century", + "number_of_pages": 5, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 25.224073, + "cpu_time": 24.197834, + "cer_mean": 1.0, + "cer_median": 1.0, + "cer_range": [ + 1.0, + 1.0 + ], + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 11.89340040365408 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0002", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0003", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0004", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0005", + "cer": 1.0, + "wer": 1.0 + } + ] + } + }, + { + "eval_workflow_id": "wf-dataheyden_paedono_1548-eval", + "label": "Workflow on data heyden_paedono_1548", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/heyden_paedono_1548.ocrd.zip", + "label": "GT workspace heyden_paedono_1548" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/heyden_paedono_1548_ocr.zip", + "label": "OCR workspace for heyden_paedono_1548" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/heyden_paedono_1548_evaluation.zip", + "label": "Evaluation workspace for heyden_paedono_1548" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 3, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 23.972654, + "cpu_time": 38.630559, + "cer_mean": 0.06721136853878373, + "cer_median": 0.055232558139534885, + "cer_range": [ + 0.03580246913580247, + 0.11059907834101383 + ], + "cer_standard_deviation": 0.038810463938030185, + "wer": 0.1847677033624047, + "pages_per_minute": 7.508555373134739 + }, + "by_page": [ + { + "page_id": "phys_0007", + "cer": 0.11059907834101383, + "wer": 0.23300970873786409 + }, + { + "page_id": "phys_0013", + "cer": 0.055232558139534885, + "wer": 0.1941747572815534 + }, + { + "page_id": "phys_0014", + "cer": 0.03580246913580247, + "wer": 0.1271186440677966 + } + ] + } + }, + { + "eval_workflow_id": "wf-dataestor_rechtsgelehrsamkeit02_1758-eval", + "label": "Workflow on data estor_rechtsgelehrsamkeit02_1758", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/estor_rechtsgelehrsamkeit02_1758.ocrd.zip", + "label": "GT workspace estor_rechtsgelehrsamkeit02_1758" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/estor_rechtsgelehrsamkeit02_1758_ocr.zip", + "label": "OCR workspace for estor_rechtsgelehrsamkeit02_1758" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/estor_rechtsgelehrsamkeit02_1758_evaluation.zip", + "label": "Evaluation workspace for estor_rechtsgelehrsamkeit02_1758" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 53.64139899999999, + "cpu_time": 84.639989, + "cer_mean": 0.2886254685112176, + "cer_median": 0.2965426192770805, + "cer_range": [ + 0.07856341189674523, + 0.4828532235939643 + ], + "cer_standard_deviation": 0.204671750373206, + "wer": 0.4238352166683633, + "pages_per_minute": 4.474156238915395 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.4828532235939643, + "wer": 0.5614035087719298 + }, + { + "page_id": "phys_0002", + "cer": 0.14873239436619717, + "wer": 0.2695035460992908 + }, + { + "page_id": "phys_0003", + "cer": 0.07856341189674523, + "wer": 0.22807017543859648 + }, + { + "page_id": "phys_0004", + "cer": 0.4443528441879637, + "wer": 0.6363636363636364 + } + ] + } + }, + { + "eval_workflow_id": "wf-datalessing_menschengeschlecht_1780-eval", + "label": "Workflow on data lessing_menschengeschlecht_1780", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/lessing_menschengeschlecht_1780.ocrd.zip", + "label": "GT workspace lessing_menschengeschlecht_1780" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/lessing_menschengeschlecht_1780_ocr.zip", + "label": "OCR workspace for lessing_menschengeschlecht_1780" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/lessing_menschengeschlecht_1780_evaluation.zip", + "label": "Evaluation workspace for lessing_menschengeschlecht_1780" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 1, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 1.7175, + "cpu_time": 2.152713, + "cer_mean": 0.02493765586034913, + "cer_median": 0.02493765586034913, + "cer_range": [ + 0.02493765586034913, + 0.02493765586034913 + ], + "cer_standard_deviation": null, + "wer": 0.09836065573770492, + "pages_per_minute": 34.93449781659388 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.02493765586034913, + "wer": 0.09836065573770492 + } + ] + } + }, + { + "eval_workflow_id": "wf-dataeuler_rechenkunst01_1738-eval", + "label": "Workflow on data euler_rechenkunst01_1738", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/euler_rechenkunst01_1738.ocrd.zip", + "label": "GT workspace euler_rechenkunst01_1738" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/euler_rechenkunst01_1738_ocr.zip", + "label": "OCR workspace for euler_rechenkunst01_1738" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/euler_rechenkunst01_1738_evaluation.zip", + "label": "Evaluation workspace for euler_rechenkunst01_1738" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 201, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 6, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 44.243076, + "cpu_time": 43.668200000000006, + "cer_mean": 1.0, + "cer_median": 1.0, + "cer_range": [ + 1.0, + 1.0 + ], + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 8.136866433066272 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0002", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0003", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0004", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0005", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0006", + "cer": 1.0, + "wer": 1.0 + } + ] + } + }, + { + "eval_workflow_id": "wf-databuerger_gedichte_1778-eval", + "label": "Workflow on data buerger_gedichte_1778", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/buerger_gedichte_1778.ocrd.zip", + "label": "GT workspace buerger_gedichte_1778" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/buerger_gedichte_1778_ocr.zip", + "label": "OCR workspace for buerger_gedichte_1778" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/buerger_gedichte_1778_evaluation.zip", + "label": "Evaluation workspace for buerger_gedichte_1778" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 2, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 9.932442, + "cpu_time": 9.373615, + "cer_mean": 1.0, + "cer_median": 1.0, + "cer_range": [ + 1.0, + 1.0 + ], + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 12.081621015254857 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0002", + "cer": 1.0, + "wer": 1.0 + } + ] + } + }, + { + "eval_workflow_id": "wf-datann_besuch_1780-eval", + "label": "Workflow on data nn_besuch_1780", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/nn_besuch_1780.ocrd.zip", + "label": "GT workspace nn_besuch_1780" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/nn_besuch_1780_ocr.zip", + "label": "OCR workspace for nn_besuch_1780" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/nn_besuch_1780_evaluation.zip", + "label": "Evaluation workspace for nn_besuch_1780" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 30.565247, + "cpu_time": 29.36571, + "cer_mean": 1.0, + "cer_median": 1.0, + "cer_range": [ + 1.0, + 1.0 + ], + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 7.852054982575472 + }, + "by_page": [ + { + "page_id": "phys_00001", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_00002", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_00003", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_00004", + "cer": 1.0, + "wer": 1.0 + } + ] + } + }, + { + "eval_workflow_id": "wf-dataluther_auszlegunge_1520-eval", + "label": "Workflow on data luther_auszlegunge_1520", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/luther_auszlegunge_1520.ocrd.zip", + "label": "GT workspace luther_auszlegunge_1520" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/luther_auszlegunge_1520_ocr.zip", + "label": "OCR workspace for luther_auszlegunge_1520" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/luther_auszlegunge_1520_evaluation.zip", + "label": "Evaluation workspace for luther_auszlegunge_1520" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 2, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 9.141584, + "cpu_time": 10.972897, + "cer_mean": 0.24855845660550213, + "cer_median": 0.24855845660550213, + "cer_range": [ + 0.07865168539325842, + 0.4184652278177458 + ], + "cer_standard_deviation": 0.2402844601873776, + "wer": 0.37300307219662054, + "pages_per_minute": 13.126827910786577 + }, + "by_page": [ + { + "page_id": "phys_0003", + "cer": 0.4184652278177458, + "wer": 0.48333333333333334 + }, + { + "page_id": "phys_0029", + "cer": 0.07865168539325842, + "wer": 0.2626728110599078 + } + ] + } + }, + { + "eval_workflow_id": "wf-datatrota_mordtbrenner_1540-eval", + "label": "Workflow on data trota_mordtbrenner_1540", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/trota_mordtbrenner_1540.ocrd.zip", + "label": "GT workspace trota_mordtbrenner_1540" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/trota_mordtbrenner_1540_ocr.zip", + "label": "OCR workspace for trota_mordtbrenner_1540" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/trota_mordtbrenner_1540_evaluation.zip", + "label": "Evaluation workspace for trota_mordtbrenner_1540" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 2, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 24.812708, + "cpu_time": 34.840120999999996, + "cer_mean": 0.07499762380001901, + "cer_median": 0.07499762380001901, + "cer_range": [ + 0.0658682634730539, + 0.08412698412698413 + ], + "cer_standard_deviation": 0.012910865190184943, + "wer": 0.15174388339406558, + "pages_per_minute": 4.836231498794891 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.0658682634730539, + "wer": 0.18584070796460178 + }, + { + "page_id": "phys_0002", + "cer": 0.08412698412698413, + "wer": 0.11764705882352941 + } + ] + } + }, + { + "eval_workflow_id": "wf-databenner_herrnhuterey04_1748-eval", + "label": "Workflow on data benner_herrnhuterey04_1748", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/benner_herrnhuterey04_1748.ocrd.zip", + "label": "GT workspace benner_herrnhuterey04_1748" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/benner_herrnhuterey04_1748_ocr.zip", + "label": "OCR workspace for benner_herrnhuterey04_1748" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/benner_herrnhuterey04_1748_evaluation.zip", + "label": "Evaluation workspace for benner_herrnhuterey04_1748" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 37.143108999999995, + "cpu_time": 35.83222599999999, + "cer_mean": 1.0, + "cer_median": 1.0, + "cer_range": [ + 1.0, + 1.0 + ], + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 6.46149464763437 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0002", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0003", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0004", + "cer": 1.0, + "wer": 1.0 + } + ] + } + }, + { + "eval_workflow_id": "wf-datablumenbach_anatomie_1805-eval", + "label": "Workflow on data blumenbach_anatomie_1805", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/blumenbach_anatomie_1805.ocrd.zip", + "label": "GT workspace blumenbach_anatomie_1805" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/blumenbach_anatomie_1805_ocr.zip", + "label": "OCR workspace for blumenbach_anatomie_1805" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/blumenbach_anatomie_1805_evaluation.zip", + "label": "Evaluation workspace for blumenbach_anatomie_1805" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1800-1900", + "publication_decade": "", + "publication_year": "19th century", + "number_of_pages": 3, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 11.765522, + "cpu_time": 14.113986, + "cer_mean": 0.08328200324172261, + "cer_median": 0.08736842105263158, + "cer_range": [ + 0.04055496264674493, + 0.12192262602579132 + ], + "cer_standard_deviation": 0.04083746158658049, + "wer": 0.23519468186134854, + "pages_per_minute": 15.298938712621506 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.08736842105263158, + "wer": 0.22666666666666666 + }, + { + "page_id": "phys_0002", + "cer": 0.04055496264674493, + "wer": 0.14814814814814814 + }, + { + "page_id": "phys_0003", + "cer": 0.12192262602579132, + "wer": 0.33076923076923076 + } + ] + } + }, + { + "eval_workflow_id": "wf-datarollenhagen_reysen_1603-eval", + "label": "Workflow on data rollenhagen_reysen_1603", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/rollenhagen_reysen_1603.ocrd.zip", + "label": "GT workspace rollenhagen_reysen_1603" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/rollenhagen_reysen_1603_ocr.zip", + "label": "OCR workspace for rollenhagen_reysen_1603" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/rollenhagen_reysen_1603_evaluation.zip", + "label": "Evaluation workspace for rollenhagen_reysen_1603" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1600-1700", + "publication_decade": "", + "publication_year": "17th century", + "number_of_pages": 3, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 14.252525, + "cpu_time": 16.514329, + "cer_mean": 0.21046220070684576, + "cer_median": 0.16929133858267717, + "cer_range": [ + 0.14512471655328799, + 0.3169705469845722 + ], + "cer_standard_deviation": 0.093027024434784, + "wer": 0.3190752126565147, + "pages_per_minute": 12.629341116749487 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.3169705469845722, + "wer": 0.4649122807017544 + }, + { + "page_id": "phys_0002", + "cer": 0.16929133858267717, + "wer": 0.28104575163398693 + }, + { + "page_id": "phys_0003", + "cer": 0.14512471655328799, + "wer": 0.2112676056338028 + } + ] + } + }, + { + "eval_workflow_id": "wf-databohse_helicon_1696-eval", + "label": "Workflow on data bohse_helicon_1696", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/bohse_helicon_1696.ocrd.zip", + "label": "GT workspace bohse_helicon_1696" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/bohse_helicon_1696_ocr.zip", + "label": "OCR workspace for bohse_helicon_1696" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/bohse_helicon_1696_evaluation.zip", + "label": "Evaluation workspace for bohse_helicon_1696" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 315, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1600-1700", + "publication_decade": "", + "publication_year": "17th century", + "number_of_pages": 5, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 28.21906, + "cpu_time": 27.394986, + "cer_mean": 1.0, + "cer_median": 1.0, + "cer_range": [ + 1.0, + 1.0 + ], + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 10.63111244669383 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0002", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0003", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0004", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0005", + "cer": 1.0, + "wer": 1.0 + } + ] + } + }, + { + "eval_workflow_id": "wf-dataarnimb_goethe03_1835-eval", + "label": "Workflow on data arnimb_goethe03_1835", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/arnimb_goethe03_1835.ocrd.zip", + "label": "GT workspace arnimb_goethe03_1835" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/arnimb_goethe03_1835_ocr.zip", + "label": "OCR workspace for arnimb_goethe03_1835" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/arnimb_goethe03_1835_evaluation.zip", + "label": "Evaluation workspace for arnimb_goethe03_1835" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Fraktur" + ], + "publication_century": "1800-1900", + "publication_decade": "", + "publication_year": "19th century", + "number_of_pages": 1, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 4.205169, + "cpu_time": 5.02776, + "cer_mean": 0.004721435316336166, + "cer_median": 0.004721435316336166, + "cer_range": [ + 0.004721435316336166, + 0.004721435316336166 + ], + "cer_standard_deviation": null, + "wer": 0.015873015873015872, + "pages_per_minute": 14.268154264430276 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.004721435316336166, + "wer": 0.015873015873015872 + } + ] + } + }, + { + "eval_workflow_id": "wf-dataluz_blitz_1784-eval", + "label": "Workflow on data luz_blitz_1784", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/luz_blitz_1784.ocrd.zip", + "label": "GT workspace luz_blitz_1784" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/luz_blitz_1784_ocr.zip", + "label": "OCR workspace for luz_blitz_1784" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/luz_blitz_1784_evaluation.zip", + "label": "Evaluation workspace for luz_blitz_1784" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 55.558143, + "cpu_time": 84.72784899999999, + "cer_mean": 0.02547899799369996, + "cer_median": 0.027204160076875337, + "cer_range": [ + 0.010332950631458095, + 0.03717472118959108 + ], + "cer_standard_deviation": 0.011266997500136374, + "wer": 0.07714749104131867, + "pages_per_minute": 4.319798809690237 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.03717472118959108, + "wer": 0.10256410256410256 + }, + { + "page_id": "phys_0002", + "cer": 0.025119617224880382, + "wer": 0.09565217391304348 + }, + { + "page_id": "phys_0003", + "cer": 0.029288702928870293, + "wer": 0.07924528301886792 + }, + { + "page_id": "phys_0004", + "cer": 0.010332950631458095, + "wer": 0.0311284046692607 + } + ] + } + }, + { + "eval_workflow_id": "wf-dataheyden_paedono_1548-eval", + "label": "Workflow on data heyden_paedono_1548", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/heyden_paedono_1548.ocrd.zip", + "label": "GT workspace heyden_paedono_1548" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/heyden_paedono_1548_ocr.zip", + "label": "OCR workspace for heyden_paedono_1548" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/heyden_paedono_1548_evaluation.zip", + "label": "Evaluation workspace for heyden_paedono_1548" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 3, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 13.685353, + "cpu_time": 13.153721, + "cer_mean": 1.0, + "cer_median": 1.0, + "cer_range": [ + 1.0, + 1.0 + ], + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 13.15274805114636 + }, + "by_page": [ + { + "page_id": "phys_0007", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0013", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0014", + "cer": 1.0, + "wer": 1.0 + } + ] + } + }, + { + "eval_workflow_id": "wf-datarollenhagen_reysen_1603-eval", + "label": "Workflow on data rollenhagen_reysen_1603", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/rollenhagen_reysen_1603.ocrd.zip", + "label": "GT workspace rollenhagen_reysen_1603" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/rollenhagen_reysen_1603_ocr.zip", + "label": "OCR workspace for rollenhagen_reysen_1603" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/rollenhagen_reysen_1603_evaluation.zip", + "label": "Evaluation workspace for rollenhagen_reysen_1603" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1600-1700", + "publication_decade": "", + "publication_year": "17th century", + "number_of_pages": 3, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 34.360988, + "cpu_time": 33.38291099999999, + "cer_mean": 1.0, + "cer_median": 1.0, + "cer_range": [ + 1.0, + 1.0 + ], + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 5.238498962835411 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0002", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0003", + "cer": 1.0, + "wer": 1.0 + } + ] + } + }, + { + "eval_workflow_id": "wf-databallenstedt_delatio_1777-eval", + "label": "Workflow on data ballenstedt_delatio_1777", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/ballenstedt_delatio_1777.ocrd.zip", + "label": "GT workspace ballenstedt_delatio_1777" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/ballenstedt_delatio_1777_ocr.zip", + "label": "OCR workspace for ballenstedt_delatio_1777" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/ballenstedt_delatio_1777_evaluation.zip", + "label": "Evaluation workspace for ballenstedt_delatio_1777" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 3, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 56.39011899999999, + "cpu_time": 88.809237, + "cer_mean": 0.02808165896942696, + "cer_median": 0.02821869488536155, + "cer_range": [ + 0.020618556701030927, + 0.03540772532188841 + ], + "cer_standard_deviation": 0.007395536576593408, + "wer": 0.13838629881265765, + "pages_per_minute": 3.1920485927685314 + }, + "by_page": [ + { + "page_id": "phys_00003", + "cer": 0.020618556701030927, + "wer": 0.11392405063291139 + }, + { + "page_id": "phys_00005", + "cer": 0.02821869488536155, + "wer": 0.16115702479338842 + }, + { + "page_id": "phys_00010", + "cer": 0.03540772532188841, + "wer": 0.14007782101167315 + } + ] + } + }, + { + "eval_workflow_id": "wf-dataalberti_pictura_1540-eval", + "label": "Workflow on data alberti_pictura_1540", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/alberti_pictura_1540.ocrd.zip", + "label": "GT workspace alberti_pictura_1540" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/alberti_pictura_1540_ocr.zip", + "label": "OCR workspace for alberti_pictura_1540" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/alberti_pictura_1540_evaluation.zip", + "label": "Evaluation workspace for alberti_pictura_1540" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 3, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 27.849722, + "cpu_time": 47.006445, + "cer_mean": 0.10363204260868718, + "cer_median": 0.11836734693877551, + "cer_range": [ + 0.046632124352331605, + 0.1458966565349544 + ], + "cer_standard_deviation": 0.05124654849483992, + "wer": 0.2145458690579216, + "pages_per_minute": 6.463260207767963 + }, + "by_page": [ + { + "page_id": "phys_0007", + "cer": 0.046632124352331605, + "wer": 0.1652892561983471 + }, + { + "page_id": "phys_0008", + "cer": 0.1458966565349544, + "wer": 0.2670807453416149 + }, + { + "page_id": "phys_0009", + "cer": 0.11836734693877551, + "wer": 0.2112676056338028 + } + ] + } + }, + { + "eval_workflow_id": "wf-dataluz_blitz_1784-eval", + "label": "Workflow on data luz_blitz_1784", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/luz_blitz_1784.ocrd.zip", + "label": "GT workspace luz_blitz_1784" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/luz_blitz_1784_ocr.zip", + "label": "OCR workspace for luz_blitz_1784" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/luz_blitz_1784_evaluation.zip", + "label": "Evaluation workspace for luz_blitz_1784" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 31.967221, + "cpu_time": 30.880905, + "cer_mean": 1.0, + "cer_median": 1.0, + "cer_range": [ + 1.0, + 1.0 + ], + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 7.50769045579533 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0002", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0003", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0004", + "cer": 1.0, + "wer": 1.0 + } + ] + } + }, + { + "eval_workflow_id": "wf-databernd_lebensbeschreibung_1738-eval", + "label": "Workflow on data bernd_lebensbeschreibung_1738", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/bernd_lebensbeschreibung_1738.ocrd.zip", + "label": "GT workspace bernd_lebensbeschreibung_1738" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/bernd_lebensbeschreibung_1738_ocr.zip", + "label": "OCR workspace for bernd_lebensbeschreibung_1738" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/bernd_lebensbeschreibung_1738_evaluation.zip", + "label": "Evaluation workspace for bernd_lebensbeschreibung_1738" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 3, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 16.878657, + "cpu_time": 16.269429000000002, + "cer_mean": 1.0, + "cer_median": 1.0, + "cer_range": [ + 1.0, + 1.0 + ], + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 10.664355582319137 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0002", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0003", + "cer": 1.0, + "wer": 1.0 + } + ] + } + }, + { + "eval_workflow_id": "wf-datareichsanzeiger_random-eval", + "label": "Workflow on data reichsanzeiger_random", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/reichsanzeiger_random.ocrd.zip", + "label": "GT workspace reichsanzeiger_random" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_random_ocr.zip", + "label": "OCR workspace for reichsanzeiger_random" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_random_evaluation.zip", + "label": "Evaluation workspace for reichsanzeiger_random" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-tesserocr-crop", + "params": { + "dpi": 0, + "padding": 4 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua" + ], + "publication_century": "1820-1939", + "publication_decade": "", + "publication_year": "19th century", + "number_of_pages": 6, + "layout": "reichsanzeiger-gt" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 3106.4616029999997, + "cpu_time": 4679.73138, + "cer_mean": 1.0169660871444868, + "cer_median": 0.7215611935116096, + "cer_range": [ + 0.18319367967440747, + 2.105984787830264 + ], + "cer_standard_deviation": 0.8374705743292857, + "wer": 1.3445096706748612, + "pages_per_minute": 0.1158874777825477 + }, + "by_page": [ + { + "page_id": "P_1879_45_0344", + "cer": 0.9179881576260804, + "wer": 1.1783992285438765 + }, + { + "page_id": "P_1885_5_0055", + "cer": 0.37306747209992835, + "wer": 0.5381769256643122 + }, + { + "page_id": "P_1889_1_0018", + "cer": 0.5251342293971388, + "wer": 0.8080473372781065 + }, + { + "page_id": "P_1891_33_0452", + "cer": 0.18319367967440747, + "wer": 0.4236050756127238 + }, + { + "page_id": "P_1932_300_0488", + "cer": 2.105984787830264, + "wer": 2.814037626628075 + }, + { + "page_id": "P_1936_123_0292", + "cer": 1.9964281962391008, + "wer": 2.304791830322074 + } + ] + } + }, + { + "eval_workflow_id": "wf-datatrota_mordtbrenner_1540-eval", + "label": "Workflow on data trota_mordtbrenner_1540", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/trota_mordtbrenner_1540.ocrd.zip", + "label": "GT workspace trota_mordtbrenner_1540" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/trota_mordtbrenner_1540_ocr.zip", + "label": "OCR workspace for trota_mordtbrenner_1540" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/trota_mordtbrenner_1540_evaluation.zip", + "label": "Evaluation workspace for trota_mordtbrenner_1540" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 2, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 6.38229, + "cpu_time": 7.811183, + "cer_mean": 0.043071000855431994, + "cer_median": 0.043071000855431994, + "cer_range": [ + 0.014285714285714285, + 0.0718562874251497 + ], + "cer_standard_deviation": 0.04070854266369089, + "wer": 0.10714905431199029, + "pages_per_minute": 18.802028738900926 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.0718562874251497, + "wer": 0.19469026548672566 + }, + { + "page_id": "phys_0002", + "cer": 0.014285714285714285, + "wer": 0.0196078431372549 + } + ] + } + }, + { + "eval_workflow_id": "wf-datajusti_abhandlung01_1758-eval", + "label": "Workflow on data justi_abhandlung01_1758", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/justi_abhandlung01_1758.ocrd.zip", + "label": "GT workspace justi_abhandlung01_1758" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/justi_abhandlung01_1758_ocr.zip", + "label": "OCR workspace for justi_abhandlung01_1758" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/justi_abhandlung01_1758_evaluation.zip", + "label": "Evaluation workspace for justi_abhandlung01_1758" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 27.452519000000002, + "cpu_time": 26.471052, + "cer_mean": 1.0, + "cer_median": 1.0, + "cer_range": [ + 1.0, + 1.0 + ], + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 8.742367139423527 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0002", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0003", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0004", + "cer": 1.0, + "wer": 1.0 + } + ] + } + }, + { + "eval_workflow_id": "wf-datahuebner_handbuch_1696-eval", + "label": "Workflow on data huebner_handbuch_1696", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/huebner_handbuch_1696.ocrd.zip", + "label": "GT workspace huebner_handbuch_1696" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/huebner_handbuch_1696_ocr.zip", + "label": "OCR workspace for huebner_handbuch_1696" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/huebner_handbuch_1696_evaluation.zip", + "label": "Evaluation workspace for huebner_handbuch_1696" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1600-1700", + "publication_decade": "", + "publication_year": "17th century", + "number_of_pages": 3, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 48.470510000000004, + "cpu_time": 66.43220300000002, + "cer_mean": 0.20641950275814583, + "cer_median": 0.1099236641221374, + "cer_range": [ + 0.09132841328413284, + 0.4180064308681672 + ], + "cer_standard_deviation": 0.18347538513029096, + "wer": 0.3802990111723255, + "pages_per_minute": 3.713598227045682 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.4180064308681672, + "wer": 0.7073170731707317 + }, + { + "page_id": "phys_0002", + "cer": 0.1099236641221374, + "wer": 0.24778761061946902 + }, + { + "page_id": "phys_0003", + "cer": 0.09132841328413284, + "wer": 0.18579234972677597 + } + ] + } + }, + { + "eval_workflow_id": "wf-datakistler_kraeuter_1500-eval", + "label": "Workflow on data kistler_kraeuter_1500", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/kistler_kraeuter_1500.ocrd.zip", + "label": "GT workspace kistler_kraeuter_1500" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/kistler_kraeuter_1500_ocr.zip", + "label": "OCR workspace for kistler_kraeuter_1500" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/kistler_kraeuter_1500_evaluation.zip", + "label": "Evaluation workspace for kistler_kraeuter_1500" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 2, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 27.745072, + "cpu_time": 42.637415, + "cer_mean": 0.10034282802482036, + "cer_median": 0.10034282802482036, + "cer_range": [ + 0.09958847736625515, + 0.10109717868338558 + ], + "cer_standard_deviation": 0.001066812932128006, + "wer": 0.33029935275080907, + "pages_per_minute": 4.325092398390604 + }, + "by_page": [ + { + "page_id": "phys_0007", + "cer": 0.09958847736625515, + "wer": 0.36893203883495146 + }, + { + "page_id": "phys_0021", + "cer": 0.10109717868338558, + "wer": 0.2916666666666667 + } + ] + } + }, + { + "eval_workflow_id": "wf-dataluz_blitz_1784-eval", + "label": "Workflow on data luz_blitz_1784", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/luz_blitz_1784.ocrd.zip", + "label": "GT workspace luz_blitz_1784" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/luz_blitz_1784_ocr.zip", + "label": "OCR workspace for luz_blitz_1784" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/luz_blitz_1784_evaluation.zip", + "label": "Evaluation workspace for luz_blitz_1784" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1700-1800", + "publication_decade": "", + "publication_year": "18th century", + "number_of_pages": 4, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 17.701244, + "cpu_time": 20.535555, + "cer_mean": 0.030367084502750087, + "cer_median": 0.02381735856100435, + "cer_range": [ + 0.014354066985645933, + 0.05947955390334572 + ], + "cer_standard_deviation": 0.02030808283356641, + "wer": 0.08722583259487592, + "pages_per_minute": 13.558369118012271 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.05947955390334572, + "wer": 0.1794871794871795 + }, + { + "page_id": "phys_0002", + "cer": 0.014354066985645933, + "wer": 0.043478260869565216 + }, + { + "page_id": "phys_0003", + "cer": 0.02869097429766886, + "wer": 0.07924528301886792 + }, + { + "page_id": "phys_0004", + "cer": 0.01894374282433984, + "wer": 0.04669260700389105 + } + ] + } + }, + { + "eval_workflow_id": "wf-datahuebner_handbuch_1696-eval", + "label": "Workflow on data huebner_handbuch_1696", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/huebner_handbuch_1696.ocrd.zip", + "label": "GT workspace huebner_handbuch_1696" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/huebner_handbuch_1696_ocr.zip", + "label": "OCR workspace for huebner_handbuch_1696" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/huebner_handbuch_1696_evaluation.zip", + "label": "Evaluation workspace for huebner_handbuch_1696" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1600-1700", + "publication_decade": "", + "publication_year": "17th century", + "number_of_pages": 3, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 28.173187000000002, + "cpu_time": 27.232793000000004, + "cer_mean": 1.0, + "cer_median": 1.0, + "cer_range": [ + 1.0, + 1.0 + ], + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 6.389053535192876 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0002", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0003", + "cer": 1.0, + "wer": 1.0 + } + ] + } + }, + { + "eval_workflow_id": "wf-datareichsanzeiger_title_pages-eval", + "label": "Workflow on data reichsanzeiger_title_pages", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/reichsanzeiger_title_pages.ocrd.zip", + "label": "GT workspace reichsanzeiger_title_pages" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_title_pages_ocr.zip", + "label": "OCR workspace for reichsanzeiger_title_pages" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_title_pages_evaluation.zip", + "label": "Evaluation workspace for reichsanzeiger_title_pages" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-tesserocr-crop", + "params": { + "dpi": 0, + "padding": 4 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua" + ], + "publication_century": "1820-1939", + "publication_decade": "", + "publication_year": "19th century", + "number_of_pages": 5, + "layout": "reichsanzeiger-gt" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 1006.2491769999999, + "cpu_time": 965.354889, + "cer_mean": 1.0, + "cer_median": 1.0, + "cer_range": [ + 1.0, + 1.0 + ], + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 0.2981368898053768 + }, + "by_page": [ + { + "page_id": "P_1881_115_0163", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "P_1885_5_0054", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "P_1887_134_0444", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "P_1916_169_0087", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "P_1918_267_0129", + "cer": 1.0, + "wer": 1.0 + } + ] + } + }, + { + "eval_workflow_id": "wf-datareichsanzeiger_many_ads-eval", + "label": "Workflow on data reichsanzeiger_many_ads", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/reichsanzeiger_many_ads.ocrd.zip", + "label": "GT workspace reichsanzeiger_many_ads" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_many_ads_ocr.zip", + "label": "OCR workspace for reichsanzeiger_many_ads" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_many_ads_evaluation.zip", + "label": "Evaluation workspace for reichsanzeiger_many_ads" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua" + ], + "publication_century": "1820-1939", + "publication_decade": "", + "publication_year": "19th century", + "number_of_pages": 5, + "layout": "reichsanzeiger-gt" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 307.154189, + "cpu_time": 356.97703, + "cer_mean": 1.3340240550306781, + "cer_median": 0.7412443874278384, + "cer_range": [ + 0.3960932753867003, + 3.7402255639097746 + ], + "cer_standard_deviation": 1.378192503303974, + "wer": 1.8653811990174032, + "pages_per_minute": 0.9767081509671353 + }, + "by_page": [ + { + "page_id": "P_1871_155_0279", + "cer": 0.3960932753867003, + "wer": 0.49564980967917344 + }, + { + "page_id": "P_1871_65_0045", + "cer": 0.7412443874278384, + "wer": 0.947814451382694 + }, + { + "page_id": "P_1873_1_0017", + "cer": 0.5852251348300515, + "wer": 0.6788418708240535 + }, + { + "page_id": "P_1881_1_0662", + "cer": 3.7402255639097746, + "wer": 5.734011627906977 + }, + { + "page_id": "P_1883_55_0044", + "cer": 1.2073319135990264, + "wer": 1.4705882352941178 + } + ] + } + }, + { + "eval_workflow_id": "wf-datablumenbach_anatomie_1805-eval", + "label": "Workflow on data blumenbach_anatomie_1805", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/blumenbach_anatomie_1805.ocrd.zip", + "label": "GT workspace blumenbach_anatomie_1805" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/blumenbach_anatomie_1805_ocr.zip", + "label": "OCR workspace for blumenbach_anatomie_1805" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/blumenbach_anatomie_1805_evaluation.zip", + "label": "Evaluation workspace for blumenbach_anatomie_1805" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "segmentation_level": "word", + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_tables": true, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1800-1900", + "publication_decade": "", + "publication_year": "19th century", + "number_of_pages": 3, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 38.17563400000001, + "cpu_time": 59.410665, + "cer_mean": 0.08697690719872657, + "cer_median": 0.04421052631578947, + "cer_range": [ + 0.03735325506937033, + 0.17936694021101993 + ], + "cer_standard_deviation": 0.08008554296654442, + "wer": 0.2764482431149098, + "pages_per_minute": 4.715049395119409 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 0.04421052631578947, + "wer": 0.16666666666666666 + }, + { + "page_id": "phys_0002", + "cer": 0.03735325506937033, + "wer": 0.17037037037037037 + }, + { + "page_id": "phys_0003", + "cer": 0.17936694021101993, + "wer": 0.49230769230769234 + } + ] + } + }, + { + "eval_workflow_id": "wf-datareichsanzeiger_many_ads-eval", + "label": "Workflow on data reichsanzeiger_many_ads", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/reichsanzeiger_many_ads.ocrd.zip", + "label": "GT workspace reichsanzeiger_many_ads" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_many_ads_ocr.zip", + "label": "OCR workspace for reichsanzeiger_many_ads" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_many_ads_evaluation.zip", + "label": "Evaluation workspace for reichsanzeiger_many_ads" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua" + ], + "publication_century": "1820-1939", + "publication_decade": "", + "publication_year": "19th century", + "number_of_pages": 5, + "layout": "reichsanzeiger-gt" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 1968.7780160000002, + "cpu_time": 1913.616593, + "cer_mean": 1.0, + "cer_median": 1.0, + "cer_range": [ + 1.0, + 1.0 + ], + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 0.15237878397764473 + }, + "by_page": [ + { + "page_id": "P_1871_155_0279", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "P_1871_65_0045", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "P_1873_1_0017", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "P_1881_1_0662", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "P_1883_55_0044", + "cer": 1.0, + "wer": 1.0 + } + ] + } + }, + { + "eval_workflow_id": "wf-dataweigel_gnothi02_1618-eval", + "label": "Workflow on data weigel_gnothi02_1618", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/weigel_gnothi02_1618.ocrd.zip", + "label": "GT workspace weigel_gnothi02_1618" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/weigel_gnothi02_1618_ocr.zip", + "label": "OCR workspace for weigel_gnothi02_1618" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/weigel_gnothi02_1618_evaluation.zip", + "label": "Evaluation workspace for weigel_gnothi02_1618" + }, + "workflow_steps": [ + { + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 + } + } + ], + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua", + "Fraktur" + ], + "publication_century": "1600-1700", + "publication_decade": "", + "publication_year": "17th century", + "number_of_pages": 4, + "layout": "simple" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 45.738777, + "cpu_time": 44.076152, + "cer_mean": 1.0, + "cer_median": 1.0, + "cer_range": [ + 1.0, + 1.0 + ], + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 5.247188834979125 + }, + "by_page": [ + { + "page_id": "phys_0001", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0002", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0003", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0004", + "cer": 1.0, + "wer": 1.0 } ] } }, { - "eval_workflow_id": "wf-databernd_lebensbeschreibung_1738-eval", - "label": "Workflow on data bernd_lebensbeschreibung_1738", + "eval_workflow_id": "wf-dataestor_rechtsgelehrsamkeit02_1758-eval", + "label": "Workflow on data estor_rechtsgelehrsamkeit02_1758", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/selected_pages_ocr.txt", + "label": "OCR Workflow selected_pages_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/bernd_lebensbeschreibung_1738.ocrd.zip", - "label": "GT workspace bernd_lebensbeschreibung_1738" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/estor_rechtsgelehrsamkeit02_1758.ocrd.zip", + "label": "GT workspace estor_rechtsgelehrsamkeit02_1758" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/bernd_lebensbeschreibung_1738_ocr.zip", - "label": "OCR workspace for bernd_lebensbeschreibung_1738" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/estor_rechtsgelehrsamkeit02_1758_ocr.zip", + "label": "OCR workspace for estor_rechtsgelehrsamkeit02_1758" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/bernd_lebensbeschreibung_1738_evaluation.zip", - "label": "Evaluation workspace for bernd_lebensbeschreibung_1738" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/estor_rechtsgelehrsamkeit02_1758_evaluation.zip", + "label": "Evaluation workspace for estor_rechtsgelehrsamkeit02_1758" }, "workflow_steps": [ { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-binarize", "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, - "shrink_polygons": false, - "block_polygons": false, - "find_staves": false, - "sparse_text": false, - "raw_lines": false, - "char_whitelist": "", - "char_blacklist": "", - "char_unblacklist": "", - "tesseract_parameters": {}, - "xpath_parameters": {}, - "xpath_model": {}, - "auto_model": false, - "oem": "DEFAULT" + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-skimage-binarize", + "params": { + "method": "li", + "level-of-operation": "page", + "dpi": 0, + "window_size": 301, + "k": 0.34 + } + }, + { + "id": "ocrd-skimage-denoise", + "params": { + "level-of-operation": "page", + "dpi": 0, + "protect": 0.0, + "maxsize": 1.0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-cis-ocropy-segment", + "params": { + "dpi": 0, + "level-of-operation": "region", + "maxcolseps": 20, + "maxseps": 20, + "maximages": 10, + "csminheight": 4, + "hlminwidth": 10, + "gap_height": 0.01, + "gap_width": 1.5, + "overwrite_order": true, + "overwrite_separators": true, + "overwrite_regions": true, + "overwrite_lines": true, + "spread": 2.4 + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-calamari-recognize", + "params": { + "checkpoint_dir": "qurator-gt4histocr-1.0", + "voter": "confidence_voter_default_ctc", + "textequiv_level": "line", + "glyph_conf_cutoff": 0.001 } } ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", + "workflow_model": "qurator-gt4histocr-1.0", + "eval_tool": "ocrd-calamari-recognize v1.0.5 (calamari 1.0.5, tensorflow 2.12.0)", "document_metadata": { "data_properties": { "fonts": [ @@ -1825,47 +10947,52 @@ "publication_century": "1700-1800", "publication_decade": "", "publication_year": "18th century", - "number_of_pages": 3, + "number_of_pages": 4, "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 8.577653, - "cpu_time": 10.790615, - "cer_mean": 0.1790109644215708, - "cer_median": 0.05504587155963303, + "wall_time": 28.053300999999998, + "cpu_time": 26.997501, + "cer_mean": 1.0, + "cer_median": 1.0, "cer_range": [ - 0.008409785932721712, - 0.4735772357723577 + 1.0, + 1.0 ], - "cer_standard_deviation": 0.2561653709691831, - "wer": 0.2391410103864555, - "pages_per_minute": 20.984761216150854 + "cer_standard_deviation": 0.0, + "wer": 1.0, + "pages_per_minute": 8.555142940219406 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.4735772357723577, - "wer": 0.5882352941176471 + "cer": 1.0, + "wer": 1.0 }, { "page_id": "phys_0002", - "cer": 0.05504587155963303, - "wer": 0.09821428571428571 + "cer": 1.0, + "wer": 1.0 }, { "page_id": "phys_0003", - "cer": 0.008409785932721712, - "wer": 0.030973451327433628 + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "phys_0004", + "cer": 1.0, + "wer": 1.0 } ] } }, { - "eval_workflow_id": "wf-dataweigel_gnothi02_1618-eval", - "label": "Workflow on data weigel_gnothi02_1618", + "eval_workflow_id": "wf-databallenstedt_delatio_1777-eval", + "label": "Workflow on data ballenstedt_delatio_1777", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", @@ -1876,16 +11003,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/weigel_gnothi02_1618.ocrd.zip", - "label": "GT workspace weigel_gnothi02_1618" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/ballenstedt_delatio_1777.ocrd.zip", + "label": "GT workspace ballenstedt_delatio_1777" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/weigel_gnothi02_1618_ocr.zip", - "label": "OCR workspace for weigel_gnothi02_1618" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/ballenstedt_delatio_1777_ocr.zip", + "label": "OCR workspace for ballenstedt_delatio_1777" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/weigel_gnothi02_1618_evaluation.zip", - "label": "Evaluation workspace for weigel_gnothi02_1618" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/ballenstedt_delatio_1777_evaluation.zip", + "label": "Evaluation workspace for ballenstedt_delatio_1777" }, "workflow_steps": [ { @@ -1923,55 +11050,50 @@ "Antiqua", "Fraktur" ], - "publication_century": "1600-1700", + "publication_century": "1700-1800", "publication_decade": "", - "publication_year": "17th century", - "number_of_pages": 4, + "publication_year": "18th century", + "number_of_pages": 3, "layout": "simple" } } }, "evaluation_results": { "document_wide": { - "wall_time": 27.856097, - "cpu_time": 31.285033, - "cer_mean": 0.11399763421865425, - "cer_median": 0.09987627660053089, + "wall_time": 18.330509, + "cpu_time": 20.979165, + "cer_mean": 0.06438361522903834, + "cer_median": 0.03969957081545064, "cer_range": [ - 0.07531106745252128, - 0.18092691622103388 + 0.021764032073310423, + 0.13168724279835392 ], - "cer_standard_deviation": 0.047514032403792344, - "wer": 0.19377061794956923, - "pages_per_minute": 8.615708079994121 + "cer_standard_deviation": 0.058972490200809365, + "wer": 0.16906902212925057, + "pages_per_minute": 9.819694586767886 }, "by_page": [ { - "page_id": "phys_0001", - "cer": 0.18092691622103388, - "wer": 0.3282051282051282 - }, - { - "page_id": "phys_0002", - "cer": 0.07531106745252128, - "wer": 0.15019762845849802 + "page_id": "phys_00003", + "cer": 0.021764032073310423, + "wer": 0.12236286919831224 }, { - "page_id": "phys_0003", - "cer": 0.11397849462365592, - "wer": 0.15934065934065933 + "page_id": "phys_00005", + "cer": 0.13168724279835392, + "wer": 0.2603305785123967 }, { - "page_id": "phys_0004", - "cer": 0.08577405857740586, - "wer": 0.13733905579399142 + "page_id": "phys_00010", + "cer": 0.03969957081545064, + "wer": 0.1245136186770428 } ] } }, { - "eval_workflow_id": "wf-datalessing_menschengeschlecht_1780-eval", - "label": "Workflow on data lessing_menschengeschlecht_1780", + "eval_workflow_id": "wf-datareichsanzeiger_title_pages-eval", + "label": "Workflow on data reichsanzeiger_title_pages", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", @@ -1982,16 +11104,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/lessing_menschengeschlecht_1780.ocrd.zip", - "label": "GT workspace lessing_menschengeschlecht_1780" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/reichsanzeiger_title_pages.ocrd.zip", + "label": "GT workspace reichsanzeiger_title_pages" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/lessing_menschengeschlecht_1780_ocr.zip", - "label": "OCR workspace for lessing_menschengeschlecht_1780" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_title_pages_ocr.zip", + "label": "OCR workspace for reichsanzeiger_title_pages" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/lessing_menschengeschlecht_1780_evaluation.zip", - "label": "Evaluation workspace for lessing_menschengeschlecht_1780" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_title_pages_evaluation.zip", + "label": "Evaluation workspace for reichsanzeiger_title_pages" }, "workflow_steps": [ { @@ -2026,78 +11148,177 @@ "document_metadata": { "data_properties": { "fonts": [ - "Antiqua", - "Fraktur" + "Antiqua" ], - "publication_century": "1700-1800", + "publication_century": "1820-1939", "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 1, - "layout": "simple" + "publication_year": "19th century", + "number_of_pages": 5, + "layout": "reichsanzeiger-gt" } } }, "evaluation_results": { "document_wide": { - "wall_time": 2.026755, - "cpu_time": 2.56115, - "cer_mean": 0.02493765586034913, - "cer_median": 0.02493765586034913, + "wall_time": 365.603086, + "cpu_time": 453.722946, + "cer_mean": 0.5074594248191785, + "cer_median": 0.3389393598117977, "cer_range": [ - 0.02493765586034913, - 0.02493765586034913 + 0.09154875911296263, + 1.0 ], - "cer_standard_deviation": null, - "wer": 0.09836065573770492, - "pages_per_minute": 29.60397285315689 + "cer_standard_deviation": 0.36783069273412317, + "wer": 0.630009265947987, + "pages_per_minute": 0.8205620014925147 }, "by_page": [ { - "page_id": "phys_0001", - "cer": 0.02493765586034913, - "wer": 0.09836065573770492 + "page_id": "P_1881_115_0163", + "cer": 0.3377672361293631, + "wer": 0.4791666666666667 + }, + { + "page_id": "P_1885_5_0054", + "cer": 0.09154875911296263, + "wer": 0.20966135458167331 + }, + { + "page_id": "P_1887_134_0444", + "cer": 1.0, + "wer": 1.0 + }, + { + "page_id": "P_1916_169_0087", + "cer": 0.3389393598117977, + "wer": 0.5072414380644062 + }, + { + "page_id": "P_1918_267_0129", + "cer": 0.769041769041769, + "wer": 0.953976870427189 } ] } }, { - "eval_workflow_id": "wf-dataalberti_pictura_1540-eval", - "label": "Workflow on data alberti_pictura_1540", + "eval_workflow_id": "wf-dataeuler_rechenkunst01_1738-eval", + "label": "Workflow on data euler_rechenkunst01_1738", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/alberti_pictura_1540.ocrd.zip", - "label": "GT workspace alberti_pictura_1540" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/euler_rechenkunst01_1738.ocrd.zip", + "label": "GT workspace euler_rechenkunst01_1738" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/alberti_pictura_1540_ocr.zip", - "label": "OCR workspace for alberti_pictura_1540" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/euler_rechenkunst01_1738_ocr.zip", + "label": "OCR workspace for euler_rechenkunst01_1738" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/alberti_pictura_1540_evaluation.zip", - "label": "Evaluation workspace for alberti_pictura_1540" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/euler_rechenkunst01_1738_evaluation.zip", + "label": "Evaluation workspace for euler_rechenkunst01_1738" }, "workflow_steps": [ { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-binarize", + "params": { + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", "params": { - "segmentation_level": "region", - "textequiv_level": "word", + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, "model": "Fraktur_GT4HistOCR", "dpi": 0, "padding": 0, - "overwrite_segments": false, + "segmentation_level": "word", "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, + "find_tables": true, "find_staves": false, "sparse_text": false, "raw_lines": false, @@ -2120,50 +11341,65 @@ "Antiqua", "Fraktur" ], - "publication_century": "1500-1600", + "publication_century": "1700-1800", "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 3, + "publication_year": "18th century", + "number_of_pages": 6, "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 7.85773, - "cpu_time": 9.961092, - "cer_mean": 0.10240852523716282, - "cer_median": 0.10536980749746708, + "wall_time": 69.019748, + "cpu_time": 108.522222, + "cer_mean": 0.1500034071140843, + "cer_median": 0.12202020202020203, "cer_range": [ - 0.07124352331606218, - 0.1306122448979592 + 0.03902862098872507, + 0.39473684210526316 ], - "cer_standard_deviation": 0.02979493530847308, - "wer": 0.23466068901129858, - "pages_per_minute": 22.907379103125205 + "cer_standard_deviation": 0.12934283380557174, + "wer": 0.27891577144281926, + "pages_per_minute": 5.215898499078843 }, "by_page": [ { - "page_id": "phys_0007", - "cer": 0.07124352331606218, - "wer": 0.2231404958677686 + "page_id": "phys_0001", + "cer": 0.03902862098872507, + "wer": 0.0872093023255814 }, { - "page_id": "phys_0008", - "cer": 0.10536980749746708, - "wer": 0.2484472049689441 + "page_id": "phys_0002", + "cer": 0.39473684210526316, + "wer": 0.6402439024390244 }, { - "page_id": "phys_0009", - "cer": 0.1306122448979592, - "wer": 0.2323943661971831 + "page_id": "phys_0003", + "cer": 0.16756341275941583, + "wer": 0.3592233009708738 + }, + { + "page_id": "phys_0004", + "cer": 0.13737373737373737, + "wer": 0.2 + }, + { + "page_id": "phys_0005", + "cer": 0.10666666666666667, + "wer": 0.21893491124260356 + }, + { + "page_id": "phys_0006", + "cer": 0.05465116279069768, + "wer": 0.1678832116788321 } ] } }, { - "eval_workflow_id": "wf-dataheyden_paedono_1548-eval", - "label": "Workflow on data heyden_paedono_1548", + "eval_workflow_id": "wf-dataalberti_pictura_1540-eval", + "label": "Workflow on data alberti_pictura_1540", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", @@ -2174,16 +11410,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/heyden_paedono_1548.ocrd.zip", - "label": "GT workspace heyden_paedono_1548" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/alberti_pictura_1540.ocrd.zip", + "label": "GT workspace alberti_pictura_1540" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/heyden_paedono_1548_ocr.zip", - "label": "OCR workspace for heyden_paedono_1548" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/alberti_pictura_1540_ocr.zip", + "label": "OCR workspace for alberti_pictura_1540" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/heyden_paedono_1548_evaluation.zip", - "label": "Evaluation workspace for heyden_paedono_1548" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/alberti_pictura_1540_evaluation.zip", + "label": "Evaluation workspace for alberti_pictura_1540" }, "workflow_steps": [ { @@ -2225,46 +11461,46 @@ "publication_decade": "", "publication_year": "16th century", "number_of_pages": 3, - "layout": "simple" + "layout": "complex" } } }, "evaluation_results": { "document_wide": { - "wall_time": 8.303854, - "cpu_time": 10.292532, - "cer_mean": 0.07452119312897007, - "cer_median": 0.0629800307219662, + "wall_time": 7.080852, + "cpu_time": 9.399835, + "cer_mean": 0.10240852523716282, + "cer_median": 0.10536980749746708, "cer_range": [ - 0.037037037037037035, - 0.12354651162790697 + 0.07124352331606218, + 0.1306122448979592 ], - "cer_standard_deviation": 0.044394494261965886, - "wer": 0.22683890077340793, - "pages_per_minute": 21.676681695029803 + "cer_standard_deviation": 0.02979493530847308, + "wer": 0.23466068901129858, + "pages_per_minute": 25.420669716017226 }, "by_page": [ { "page_id": "phys_0007", - "cer": 0.0629800307219662, - "wer": 0.21359223300970873 + "cer": 0.07124352331606218, + "wer": 0.2231404958677686 }, { - "page_id": "phys_0013", - "cer": 0.12354651162790697, - "wer": 0.33980582524271846 + "page_id": "phys_0008", + "cer": 0.10536980749746708, + "wer": 0.2484472049689441 }, { - "page_id": "phys_0014", - "cer": 0.037037037037037035, - "wer": 0.1271186440677966 + "page_id": "phys_0009", + "cer": 0.1306122448979592, + "wer": 0.2323943661971831 } ] } }, { - "eval_workflow_id": "wf-dataluz_blitz_1784-eval", - "label": "Workflow on data luz_blitz_1784", + "eval_workflow_id": "wf-datann_besuch_1780-eval", + "label": "Workflow on data nn_besuch_1780", "metadata": { "ocr_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", @@ -2275,16 +11511,16 @@ "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/luz_blitz_1784.ocrd.zip", - "label": "GT workspace luz_blitz_1784" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/nn_besuch_1780.ocrd.zip", + "label": "GT workspace nn_besuch_1780" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/luz_blitz_1784_ocr.zip", - "label": "OCR workspace for luz_blitz_1784" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/nn_besuch_1780_ocr.zip", + "label": "OCR workspace for nn_besuch_1780" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/luz_blitz_1784_evaluation.zip", - "label": "Evaluation workspace for luz_blitz_1784" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/nn_besuch_1780_evaluation.zip", + "label": "Evaluation workspace for nn_besuch_1780" }, "workflow_steps": [ { @@ -2332,82 +11568,116 @@ }, "evaluation_results": { "document_wide": { - "wall_time": 19.577649, - "cpu_time": 22.415525, - "cer_mean": 0.030367084502750087, - "cer_median": 0.02381735856100435, + "wall_time": 12.750288, + "cpu_time": 15.089726, + "cer_mean": 0.06315407734401027, + "cer_median": 0.026726016076928857, "cer_range": [ - 0.014354066985645933, - 0.05947955390334572 + 0.01878453038674033, + 0.18037974683544303 ], - "cer_standard_deviation": 0.02030808283356641, - "wer": 0.08722583259487592, - "pages_per_minute": 12.258877457655922 + "cer_standard_deviation": 0.07825196427362012, + "wer": 0.11662846322503488, + "pages_per_minute": 18.82310423105737 }, "by_page": [ { - "page_id": "phys_0001", - "cer": 0.05947955390334572, - "wer": 0.1794871794871795 + "page_id": "phys_00001", + "cer": 0.18037974683544303, + "wer": 0.1896551724137931 }, { - "page_id": "phys_0002", - "cer": 0.014354066985645933, - "wer": 0.043478260869565216 + "page_id": "phys_00002", + "cer": 0.01878453038674033, + "wer": 0.08670520231213873 }, { - "page_id": "phys_0003", - "cer": 0.02869097429766886, - "wer": 0.07924528301886792 + "page_id": "phys_00003", + "cer": 0.02505446623093682, + "wer": 0.08670520231213873 }, { - "page_id": "phys_0004", - "cer": 0.01894374282433984, - "wer": 0.04669260700389105 + "page_id": "phys_00004", + "cer": 0.028397565922920892, + "wer": 0.10344827586206896 } ] } }, { - "eval_workflow_id": "wf-dataestor_rechtsgelehrsamkeit02_1758-eval", - "label": "Workflow on data estor_rechtsgelehrsamkeit02_1758", + "eval_workflow_id": "wf-datareichsanzeiger_tables-eval", + "label": "Workflow on data reichsanzeiger_tables", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/estor_rechtsgelehrsamkeit02_1758.ocrd.zip", - "label": "GT workspace estor_rechtsgelehrsamkeit02_1758" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/reichsanzeiger_tables.ocrd.zip", + "label": "GT workspace reichsanzeiger_tables" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/estor_rechtsgelehrsamkeit02_1758_ocr.zip", - "label": "OCR workspace for estor_rechtsgelehrsamkeit02_1758" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_tables_ocr.zip", + "label": "OCR workspace for reichsanzeiger_tables" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/estor_rechtsgelehrsamkeit02_1758_evaluation.zip", - "label": "Evaluation workspace for estor_rechtsgelehrsamkeit02_1758" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_tables_evaluation.zip", + "label": "Evaluation workspace for reichsanzeiger_tables" }, "workflow_steps": [ { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-binarize", "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, - "model": "Fraktur_GT4HistOCR", + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, "dpi": 0, - "padding": 0, - "overwrite_segments": false, - "overwrite_text": true, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-tesserocr-crop", + "params": { + "dpi": 0, + "padding": 4 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, "shrink_polygons": false, "block_polygons": false, + "find_tables": true, "find_staves": false, "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, "raw_lines": false, "char_whitelist": "", "char_blacklist": "", @@ -2416,102 +11686,31 @@ "xpath_parameters": {}, "xpath_model": {}, "auto_model": false, - "oem": "DEFAULT" - } - } - ], - "workflow_model": "Fraktur_GT4HistOCR", - "eval_tool": "ocrd-dinglehopper vNone", - "document_metadata": { - "data_properties": { - "fonts": [ - "Antiqua", - "Fraktur" - ], - "publication_century": "1700-1800", - "publication_decade": "", - "publication_year": "18th century", - "number_of_pages": 4, - "layout": "complex" - } - } - }, - "evaluation_results": { - "document_wide": { - "wall_time": 19.327455, - "cpu_time": 22.561449, - "cer_mean": 0.11589408928020027, - "cer_median": 0.10084584323499293, - "cer_range": [ - 0.05331088664421998, - 0.2085737840065952 - ], - "cer_standard_deviation": 0.07259145757108061, - "wer": 0.20102650242627845, - "pages_per_minute": 12.417568686617043 - }, - "by_page": [ - { - "page_id": "phys_0001", - "cer": 0.06310013717421124, - "wer": 0.11403508771929824 - }, - { - "page_id": "phys_0002", - "cer": 0.13859154929577464, - "wer": 0.22340425531914893 + "oem": "DEFAULT" + } }, { - "page_id": "phys_0003", - "cer": 0.05331088664421998, - "wer": 0.13333333333333333 + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } }, - { - "page_id": "phys_0004", - "cer": 0.2085737840065952, - "wer": 0.3333333333333333 - } - ] - } - }, - { - "eval_workflow_id": "wf-datatrota_mordtbrenner_1540-eval", - "label": "Workflow on data trota_mordtbrenner_1540", - "metadata": { - "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" - }, - "eval_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", - "label": "Evaluation Workflow dinglehopper_eval" - }, - "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/trota_mordtbrenner_1540.ocrd.zip", - "label": "GT workspace trota_mordtbrenner_1540" - }, - "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/trota_mordtbrenner_1540_ocr.zip", - "label": "OCR workspace for trota_mordtbrenner_1540" - }, - "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/trota_mordtbrenner_1540_evaluation.zip", - "label": "Evaluation workspace for trota_mordtbrenner_1540" - }, - "workflow_steps": [ { "id": "ocrd-tesserocr-recognize", "params": { - "segmentation_level": "region", - "textequiv_level": "word", - "find_tables": true, + "textequiv_level": "glyph", + "overwrite_segments": true, "model": "Fraktur_GT4HistOCR", "dpi": 0, "padding": 0, - "overwrite_segments": false, + "segmentation_level": "word", "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, + "find_tables": true, "find_staves": false, "sparse_text": false, "raw_lines": false, @@ -2531,41 +11730,55 @@ "document_metadata": { "data_properties": { "fonts": [ - "Antiqua", - "Fraktur" + "Antiqua" ], - "publication_century": "1500-1600", + "publication_century": "1820-1939", "publication_decade": "", - "publication_year": "16th century", - "number_of_pages": 2, - "layout": "simple" + "publication_year": "19th century", + "number_of_pages": 5, + "layout": "reichsanzeiger-gt" } } }, "evaluation_results": { "document_wide": { - "wall_time": 7.192959, - "cpu_time": 8.533417, - "cer_mean": 0.043071000855431994, - "cer_median": 0.043071000855431994, + "wall_time": 4081.6259200000004, + "cpu_time": 4975.624208, + "cer_mean": 1.3864721075704818, + "cer_median": 0.9776101627630012, "cer_range": [ - 0.014285714285714285, - 0.0718562874251497 + 0.9091116519622756, + 2.105984787830264 ], - "cer_standard_deviation": 0.04070854266369089, - "wer": 0.10714905431199029, - "pages_per_minute": 16.682981232063188 + "cer_standard_deviation": 0.6085337634117919, + "wer": 1.6178586644645931, + "pages_per_minute": 0.0735001212458 }, "by_page": [ { - "page_id": "phys_0001", - "cer": 0.0718562874251497, - "wer": 0.19469026548672566 + "page_id": "P_1877_7_0059", + "cer": 0.9432257390577666, + "wer": 0.9872849663425579 }, { - "page_id": "phys_0002", - "cer": 0.014285714285714285, - "wer": 0.0196078431372549 + "page_id": "P_1883_55_0044", + "cer": 0.9091116519622756, + "wer": 0.9841819080573406 + }, + { + "page_id": "P_1929_250_0019", + "cer": 0.9776101627630012, + "wer": 0.9989969909729187 + }, + { + "page_id": "P_1932_300_0488", + "cer": 2.105984787830264, + "wer": 2.814037626628075 + }, + { + "page_id": "P_1936_123_0292", + "cer": 1.9964281962391008, + "wer": 2.304791830322074 } ] } @@ -2575,8 +11788,8 @@ "label": "Workflow on data justi_abhandlung01_1758", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", @@ -2596,18 +11809,98 @@ }, "workflow_steps": [ { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-binarize", "params": { - "segmentation_level": "region", - "textequiv_level": "word", + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, "model": "Fraktur_GT4HistOCR", "dpi": 0, "padding": 0, - "overwrite_segments": false, + "segmentation_level": "word", "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, + "find_tables": true, "find_staves": false, "sparse_text": false, "raw_lines": false, @@ -2640,80 +11933,160 @@ }, "evaluation_results": { "document_wide": { - "wall_time": 13.437096, - "cpu_time": 16.64254, - "cer_mean": 0.12942563348778183, - "cer_median": 0.13278777025964683, + "wall_time": 51.503143, + "cpu_time": 81.95352600000001, + "cer_mean": 0.06272817755704542, + "cer_median": 0.04686327354908531, "cer_range": [ - 0.09893550407013149, + 0.0039946737683089215, 0.15319148936170213 ], - "cer_standard_deviation": 0.022501323128153037, - "wer": 0.19483806092245076, - "pages_per_minute": 17.861002109384348 + "cer_standard_deviation": 0.07133216362479361, + "wer": 0.0881207064150021, + "pages_per_minute": 4.6599097845348965 }, "by_page": [ { "page_id": "phys_0001", - "cer": 0.13375130616509928, - "wer": 0.2 + "cer": 0.0073145245559038665, + "wer": 0.02857142857142857 }, { "page_id": "phys_0002", "cer": 0.15319148936170213, - "wer": 0.22916666666666666 + "wer": 0.20833333333333334 }, { "page_id": "phys_0003", - "cer": 0.09893550407013149, - "wer": 0.12992125984251968 + "cer": 0.08641202254226675, + "wer": 0.10236220472440945 }, { "page_id": "phys_0004", - "cer": 0.1318242343541944, - "wer": 0.22026431718061673 + "cer": 0.0039946737683089215, + "wer": 0.013215859030837005 } ] } }, { - "eval_workflow_id": "wf-datareichsanzeiger_random-eval", - "label": "Workflow on data reichsanzeiger_random", + "eval_workflow_id": "wf-dataarnimb_goethe03_1835-eval", + "label": "Workflow on data arnimb_goethe03_1835", "metadata": { "ocr_workflow": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", - "label": "OCR Workflow minimal_ocr" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/slower_processors_ocr.txt", + "label": "OCR Workflow slower_processors_ocr" }, "eval_workflow": { "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", "label": "Evaluation Workflow dinglehopper_eval" }, "gt_workspace": { - "@id": "https://github.com/OCR-D/quiver-data/blob/main/reichsanzeiger_random.ocrd.zip", - "label": "GT workspace reichsanzeiger_random" + "@id": "https://github.com/OCR-D/quiver-data/blob/main/arnimb_goethe03_1835.ocrd.zip", + "label": "GT workspace arnimb_goethe03_1835" }, "ocr_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_random_ocr.zip", - "label": "OCR workspace for reichsanzeiger_random" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/arnimb_goethe03_1835_ocr.zip", + "label": "OCR workspace for arnimb_goethe03_1835" }, "eval_workspace": { - "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/reichsanzeiger_random_evaluation.zip", - "label": "Evaluation workspace for reichsanzeiger_random" + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/arnimb_goethe03_1835_evaluation.zip", + "label": "Evaluation workspace for arnimb_goethe03_1835" }, "workflow_steps": [ { - "id": "ocrd-tesserocr-recognize", + "id": "ocrd-cis-ocropy-binarize", "params": { - "segmentation_level": "region", - "textequiv_level": "word", + "method": "ocropy", + "threshold": 0.5, + "grayscale": false, + "maxskew": 0.0, + "noise_maxsize": 0, + "dpi": 0, + "level-of-operation": "page" + } + }, + { + "id": "ocrd-anybaseocr-crop", + "params": { + "dpi": 0, + "rulerRatioMax": 50.0, + "rulerRatioMin": 3.0, + "rulerAreaMax": 0.3, + "rulerAreaMin": 0.01, + "rulerWidthMax": 0.95, + "columnAreaMin": 0.05, + "columnSepWidthMax": 0.04, + "marginTop": 0.25, + "marginBottom": 0.75, + "marginLeft": 0.3, + "marginRight": 0.7, + "padding": 10 + } + }, + { + "id": "ocrd-cis-ocropy-denoise", + "params": { + "level-of-operation": "page", + "noise_maxsize": 3.0, + "dpi": 0 + } + }, + { + "id": "ocrd-tesserocr-deskew", + "params": { + "operation_level": "page", + "dpi": 0, + "min_orientation_confidence": 1.5 + } + }, + { + "id": "ocrd-tesserocr-segment", + "params": { + "dpi": 0, + "padding": 4, + "shrink_polygons": false, + "block_polygons": false, "find_tables": true, + "find_staves": false, + "sparse_text": false, + "overwrite_segments": true, + "segmentation_level": "region", + "textequiv_level": "none", + "overwrite_text": true, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + }, + { + "id": "ocrd-cis-ocropy-dewarp", + "params": { + "dpi": 0, + "range": 4.0, + "smoothness": 1.0, + "max_neighbour": 0.05 + } + }, + { + "id": "ocrd-tesserocr-recognize", + "params": { + "textequiv_level": "glyph", + "overwrite_segments": true, "model": "Fraktur_GT4HistOCR", "dpi": 0, "padding": 0, - "overwrite_segments": false, + "segmentation_level": "word", "overwrite_text": true, "shrink_polygons": false, "block_polygons": false, + "find_tables": true, "find_staves": false, "sparse_text": false, "raw_lines": false, @@ -2733,60 +12106,35 @@ "document_metadata": { "data_properties": { "fonts": [ - "Antiqua" + "Fraktur" ], - "publication_century": "1820-1939", + "publication_century": "1800-1900", "publication_decade": "", "publication_year": "19th century", - "number_of_pages": 6, - "layout": "reichsanzeiger-gt" + "number_of_pages": 1, + "layout": "simple" } } }, "evaluation_results": { "document_wide": { - "wall_time": 636.322401, - "cpu_time": 852.529652, - "cer_mean": 1.1094634128044334, - "cer_median": 0.8993673021608106, + "wall_time": 11.160878999999998, + "cpu_time": 17.708654999999997, + "cer_mean": 0.0056657223796034, + "cer_median": 0.0056657223796034, "cer_range": [ - 0.30019453260980855, - 1.9778823058446757 + 0.0056657223796034, + 0.0056657223796034 ], - "cer_standard_deviation": 0.645494993792005, - "wer": 1.3304151477151094, - "pages_per_minute": 0.565750945486516 + "cer_standard_deviation": null, + "wer": 0.031746031746031744, + "pages_per_minute": 5.3759206600125315 }, "by_page": [ { - "page_id": "P_1879_45_0344", - "cer": 0.9843462873477166, - "wer": 1.1759884281581485 - }, - { - "page_id": "P_1885_5_0055", - "cer": 0.30019453260980855, - "wer": 0.40127817019845274 - }, - { - "page_id": "P_1889_1_0018", - "cer": 0.7868184930272782, - "wer": 0.9704142011834319 - }, - { - "page_id": "P_1891_33_0452", - "cer": 0.8143883169739047, - "wer": 1.112810707456979 - }, - { - "page_id": "P_1932_300_0488", - "cer": 1.9778823058446757, - "wer": 2.349493487698987 - }, - { - "page_id": "P_1936_123_0292", - "cer": 1.7931505410232167, - "wer": 1.9725058915946583 + "page_id": "phys_0001", + "cer": 0.0056657223796034, + "wer": 0.031746031746031744 } ] } diff --git a/src/benchmark_extraction.py b/src/benchmark_extraction.py index 39b4b2144d236dec217160b415ca978ff35661d0..1c030c87f769d43f8fb29b2a745cfce5fc8fe7f4 100644 --- a/src/benchmark_extraction.py +++ b/src/benchmark_extraction.py @@ -7,7 +7,6 @@ import xml.etree.ElementTree as ET from os import listdir, scandir from statistics import stdev, median from typing import Any, Dict, List, Union - from .constants import METS, RESULTS, QUIVER_MAIN, OCRD @@ -198,8 +197,11 @@ def get_nextflow_time(workspace_path: str, time_type: str) -> float: for log in logs: with open(highest_workspace_dir + '/' + log, 'r', encoding='utf-8') as l: log_file = l.read() - no_sec_s = re.search(rf'([0-9]+?\.[0-9]+?)s \({time_type}\)', log_file).group(1) - time_per_workflow_step.append(float(no_sec_s)) + try: + no_sec_s = re.search(rf'([0-9]+?\.[0-9]+?)s \({time_type}\)', log_file).group(1) + time_per_workflow_step.append(float(no_sec_s)) + except AttributeError: + print(f'No wall time found in {highest_workspace_dir}/{log}. Skipping.') return sum(time_per_workflow_step) @@ -207,7 +209,11 @@ def get_pages_per_minute(workspace_path: str) -> float: duration = get_nextflow_time(workspace_path, 'wall') no_pages = get_no_of_pages(workspace_path) - return no_pages / (duration / 60) + try: + return no_pages / (duration / 60) + except ZeroDivisionError: + print('ERROR: Division by zero.') + return None def get_mean_cer(workspace_path: str, gt_type: str) -> float: diff --git a/workflows/execute_workflows.sh b/workflows/execute_workflows.sh index 3ce0d9bf5a8d5f87838c03564eb6c45ceefa7803..5a90275622ce44f0a5c38c009840a5eba912bec0 100755 --- a/workflows/execute_workflows.sh +++ b/workflows/execute_workflows.sh @@ -69,28 +69,30 @@ create_wf_specific_workspaces() { execute_wfs_and_extract_benchmarks() { # for all data sets… - for WS_DIR in "$WORKSPACE_DIR"/* + for WS_DIR in "$WORKSPACE_DIR"/*/ do - DATA_DIR="$WS_DIR"/data - DIR_NAME=$(basename "$WS_DIR") - INNER_DIR=$(ls "$DATA_DIR"/) - - if ! grep -q "OCR-D-OCR" "$WS_DIR/data/$INNER_DIR/mets.xml" ; then - echo "Switching to $WS_DIR." - - run "$DATA_DIR"/*/*ocr.txt.nf "$DIR_NAME" "$WS_DIR" - run "$DATA_DIR"/*/*eval.txt.nf "$DIR_NAME" "$WS_DIR" - - # create a result JSON according to the specs - echo "Get Benchmark JSON …" - WORKFLOW=$(basename -s .txt.nf "$DATA_DIR"/*/*ocr.txt.nf) - quiver benchmarks-extraction "$DATA_DIR"/* "$WORKFLOW" - echo "Done." - - # move data to results dir - mv "$DATA_DIR"/*/*result.json "$RESULTS_DIR" - else - echo "$WS_DIR has already been processed." + if [ "$WS_DIR" != "/app/workflows/workspaces/work/" ]; then + DATA_DIR="$WS_DIR"/data + DIR_NAME=$(basename "$WS_DIR") + INNER_DIR=$(ls "$DATA_DIR"/) + + if ! grep -q "OCR-D-OCR" "$WS_DIR/data/$INNER_DIR/mets.xml"; then + echo "Switching to $WS_DIR." + + run "$DATA_DIR"/*/*ocr.txt.nf "$DIR_NAME" + run "$DATA_DIR"/*/*eval.txt.nf "$DIR_NAME" + + # create a result JSON according to the specs + echo "Get Benchmark JSON …" + WORKFLOW=$(basename -s .txt.nf "$DATA_DIR"/*/*ocr.txt.nf) + quiver benchmarks-extraction "$WS_DIR"/data/* "$WORKFLOW" + echo "Done." + + # move data to results dir + mv "$DATA_DIR"/*/*result.json "$RESULTS_DIR" + else + echo "$WS_DIR has already been processed." + fi fi done cd "$ROOT" || exit @@ -120,20 +122,22 @@ rename_and_move_nextflow_result() { run() { # $1: $WORKFLOW # $2: $DIR_NAME - # $3: $WS_DIR nextflow run "$1" -with-weblog http://127.0.0.1:8000/nextflow/ --mets_path "/app/workflows/workspaces/$2/data/*/mets.xml" rename_and_move_nextflow_result "$1" "$2" - save_workspaces "$3"/data "$2" "$1" + save_workspaces "$1" "$2" } save_workspaces() { # $1: $WORKFLOW # $2: $DIR_NAME - # $3: $WS_DIR - echo "Zipping workspace $3" - ocrd -l ERROR zip bag -d "$DIR_NAME"/data/* -i "$DIR_NAME"/data/* "$DIR_NAME" - WORKFLOW_NAME=$(basename -s .txt.nf "$1") - mv "$WORKSPACE_DIR"/"$2".zip "$RESULTS_DIR"/"$2"_"$WORKFLOW_NAME".zip + echo "Zipping workspace $2" + DATA_DIR="$2/data/" + if basename -s .txt.nf "$1" | grep "eval"; then + WORKFLOW_NAME=$(basename -s .txt.nf "$1") + ocrd -l ERROR zip bag -d "$DATA_DIR"/* -i "$DATA_DIR"/* "$RESULTS_DIR"/"$2"_"$WORKFLOW_NAME".zip + else + ocrd -l ERROR zip bag -d "$DATA_DIR"/* -i "$DATA_DIR"/* "$RESULTS_DIR"/"$2".zip + fi } summarize_to_data_json() { diff --git a/workflows/ocrd_workflows/selected_pages_ocr.txt b/workflows/ocrd_workflows/selected_pages_ocr.txt index a6c3c7901b4858b403bf6f95fa328d1ae6c46d5b..864a00e85b3635ca22f0ac27389b1e3fe5d03443 100644 --- a/workflows/ocrd_workflows/selected_pages_ocr.txt +++ b/workflows/ocrd_workflows/selected_pages_ocr.txt @@ -1,6 +1,6 @@ ocrd process \ "cis-ocropy-binarize -I OCR-D-IMG -O OCR-D-BIN" \ - "anybaseocr-crop -I OCR-D-BIN -O OCR-D-CROP" \ + "tesserocr-crop -I OCR-D-BIN -O OCR-D-CROP" \ "skimage-binarize -I OCR-D-CROP -O OCR-D-BIN2 -P method li" \ "skimage-denoise -I OCR-D-BIN2 -O OCR-D-BIN-DENOISE -P level-of-operation page" \ "tesserocr-deskew -I OCR-D-BIN-DENOISE -O OCR-D-BIN-DENOISE-DESKEW -P operation_level page" \ diff --git a/workflows/ocrd_workflows/slower_processors_ocr.txt b/workflows/ocrd_workflows/slower_processors_ocr.txt index 299ef9b03ee20ce93a02bba00a7c7f2b7b36946c..5cabe322e859ef7984f0296d3b949de44df565df 100644 --- a/workflows/ocrd_workflows/slower_processors_ocr.txt +++ b/workflows/ocrd_workflows/slower_processors_ocr.txt @@ -1,6 +1,6 @@ ocrd process \ "cis-ocropy-binarize -I OCR-D-IMG -O OCR-D-BIN" \ - "anybaseocr-crop -I OCR-D-BIN -O OCR-D-CROP" \ + "tesserocr-crop -I OCR-D-BIN -O OCR-D-CROP" \ "cis-ocropy-denoise -I OCR-D-CROP -O OCR-D-BIN-DENOISE -P level-of-operation page" \ "tesserocr-deskew -I OCR-D-BIN-DENOISE -O OCR-D-BIN-DENOISE-DESKEW -P operation_level page" \ "tesserocr-segment -I OCR-D-BIN-DENOISE-DESKEW -O OCR-D-SEG" \