diff --git a/convert-anystyle-data/tei-to-biblstruct-gs.ipynb b/convert-anystyle-data/tei-to-biblstruct-gs.ipynb index e7545cdd053ccdaf6934b87d0013cc0e8a7408f1..d27909e7bbd864b8fdf42bc34fbd5e5bc63e7909 100644 --- a/convert-anystyle-data/tei-to-biblstruct-gs.ipynb +++ b/convert-anystyle-data/tei-to-biblstruct-gs.ipynb @@ -95,12 +95,12 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 1, "outputs": [ { "data": { "text/plain": "<IPython.core.display.Markdown object>", - "text/markdown": "### Processing 10.1111_1467-6478.00057\n - [TEI/bibl](tei-bibl-corrected/10.1111_1467-6478.00057.xml)\n - [TEI/biblStruct](tei-biblStruct/10.1111_1467-6478.00057.biblstruct.xml)" + "text/markdown": "### Processing 10.1111_1467-6478.00057\nFiles: [TEI/bibl](tei-bibl-corrected/10.1111_1467-6478.00057.xml) | [TEI/biblStruct](tei-biblStruct/10.1111_1467-6478.00057.biblstruct.xml) | [TEI/biblStruct Gold Standard](tei-biblStruct-gold/10.1111_1467-6478.00057.xml)" }, "metadata": {}, "output_type": "display_data" @@ -116,7 +116,7 @@ { "data": { "text/plain": "<IPython.core.display.Markdown object>", - "text/markdown": "### Processing 10.1111_1467-6478.00080\n - [TEI/bibl](tei-bibl-corrected/10.1111_1467-6478.00080.xml)\n - [TEI/biblStruct](tei-biblStruct/10.1111_1467-6478.00080.biblstruct.xml)" + "text/markdown": "### Processing 10.1111_1467-6478.00080\nFiles: [TEI/bibl](tei-bibl-corrected/10.1111_1467-6478.00080.xml) | [TEI/biblStruct](tei-biblStruct/10.1111_1467-6478.00080.biblstruct.xml) | [TEI/biblStruct Gold Standard](tei-biblStruct-gold/10.1111_1467-6478.00080.xml)" }, "metadata": {}, "output_type": "display_data" @@ -132,7 +132,7 @@ { "data": { "text/plain": "<IPython.core.display.Markdown object>", - "text/markdown": "### Processing 10.1515_zfrs-1980-0103\n - [TEI/bibl](tei-bibl-corrected/10.1515_zfrs-1980-0103.xml)\n - [TEI/biblStruct](tei-biblStruct/10.1515_zfrs-1980-0103.biblstruct.xml)" + "text/markdown": "### Processing 10.1515_zfrs-1980-0103\nFiles: [TEI/bibl](tei-bibl-corrected/10.1515_zfrs-1980-0103.xml) | [TEI/biblStruct](tei-biblStruct/10.1515_zfrs-1980-0103.biblstruct.xml) | [TEI/biblStruct Gold Standard](tei-biblStruct-gold/10.1515_zfrs-1980-0103.xml)" }, "metadata": {}, "output_type": "display_data" @@ -148,7 +148,7 @@ { "data": { "text/plain": "<IPython.core.display.Markdown object>", - "text/markdown": "### Processing 10.1515_zfrs-1980-0104\n - [TEI/bibl](tei-bibl-corrected/10.1515_zfrs-1980-0104.xml)\n - [TEI/biblStruct](tei-biblStruct/10.1515_zfrs-1980-0104.biblstruct.xml)" + "text/markdown": "### Processing 10.1515_zfrs-1980-0104\nFiles: [TEI/bibl](tei-bibl-corrected/10.1515_zfrs-1980-0104.xml) | [TEI/biblStruct](tei-biblStruct/10.1515_zfrs-1980-0104.biblstruct.xml) | [TEI/biblStruct Gold Standard](tei-biblStruct-gold/10.1515_zfrs-1980-0104.xml)" }, "metadata": {}, "output_type": "display_data" @@ -259,11 +259,11 @@ " \n", " bibl_path = file_path\n", " biblstruct_path = f'{biblstruct_dir}/{file_id}.biblstruct.xml'\n", + " biblstruct_gs_path = f'{biblstruct_gold_dir}/{file_id}.xml'\n", "\n", " # log\n", " md_lines = [f'### Processing {file_id}']\n", - " md_lines.append(f' - [TEI/bibl]({Path(bibl_path).as_posix()})')\n", - " md_lines.append(f' - [TEI/biblStruct]({Path(biblstruct_path).as_posix()})')\n", + " md_lines.append(f'Files: [TEI/bibl]({Path(bibl_path).as_posix()}) | [TEI/biblStruct]({Path(biblstruct_path).as_posix()}) | [TEI/biblStruct Gold Standard]({Path(biblstruct_gs_path).as_posix()})')\n", " display(Markdown(\"\\n\".join(md_lines)))\n", "\n", " with (open(bibl_path, 'r', encoding='utf-8') as bibl_file, \n", @@ -273,7 +273,7 @@ "\n", " output_data = create_gold_standard(bibl_content, biblStruct_content)\n", " \n", - " with open(f'{biblstruct_gold_dir}/{file_id}.xml', 'w', encoding='utf-8') as output_file:\n", + " with open(biblstruct_gs_path, 'w', encoding='utf-8') as output_file:\n", " output_file.write(output_data.decode())\n", "\n", "create_all_gold_standards('tei-bibl-corrected', 'tei-biblStruct', 'tei-biblStruct-gold')\n", @@ -282,11 +282,21 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-09-30T09:59:48.158571300Z", - "start_time": "2024-09-30T09:59:47.826852300Z" + "end_time": "2024-09-30T16:05:40.356954800Z", + "start_time": "2024-09-30T16:05:39.907875Z" } }, "id": "b658a0ceebfc73d9" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + }, + "id": "aa2b8d60c06d0e58" } ], "metadata": {