From 771ba5ae00d0ed641598ab8cb928187f75aff187 Mon Sep 17 00:00:00 2001
From: Christian Boulanger <boulanger@lhlt.mpg.de>
Date: Wed, 31 Jul 2024 21:45:59 +0200
Subject: [PATCH] Add XSLT code

---
 convert-anystyle-data/anystyle-to-tei.ipynb | 83 ++++++++++++++++++++-
 convert-anystyle-data/lib/.gitignore        |  1 +
 2 files changed, 81 insertions(+), 3 deletions(-)
 create mode 100644 convert-anystyle-data/lib/.gitignore

diff --git a/convert-anystyle-data/anystyle-to-tei.ipynb b/convert-anystyle-data/anystyle-to-tei.ipynb
index f22848a..395c904 100644
--- a/convert-anystyle-data/anystyle-to-tei.ipynb
+++ b/convert-anystyle-data/anystyle-to-tei.ipynb
@@ -148,7 +148,9 @@
   {
    "metadata": {},
    "cell_type": "markdown",
-   "source": "## Convert Groundd Truth to TEI",
+   "source": [
+    "## Convert Groundd Truth to TEI"
+   ],
    "id": "aaf43ee43bb6d4d"
   },
   {
@@ -404,19 +406,94 @@
    ],
    "execution_count": 80
   },
+  {
+   "cell_type": "markdown",
+   "source": [],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "8c8b2d820086d461"
+  },
   {
    "metadata": {},
    "cell_type": "markdown",
    "source": [
-    "## Create LinkML schema from TEI XSD"
+    "## Extract bibliographic data from TEI files "
    ],
    "id": "b0a231dc7bdd8b01"
   },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Download XSLTs"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "149588c08747c4b3"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "outputs": [],
+   "source": [
+    "import requests, zipfile, io, os\n",
+    "\n",
+    "if not os.path.isdir('lib/convert'): \n",
+    "    url = 'https://github.com/OpenArabicPE/convert_tei-to-bibliographic-data/archive/refs/heads/master.zip'\n",
+    "    r = requests.get(url)\n",
+    "    assert r.status_code == 200\n",
+    "    z = zipfile.ZipFile(io.BytesIO(r.content))\n",
+    "    z.extractall('lib')\n",
+    "    z.close()\n",
+    "    os.rename('lib/convert_tei-to-bibliographic-data-master', 'lib/convert')\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-07-31T19:42:48.723119300Z",
+     "start_time": "2024-07-31T19:42:47.234795500Z"
+    }
+   },
+   "id": "1f15b3af6aab73ed"
+  },
   {
    "metadata": {},
    "cell_type": "markdown",
-   "source": "",
+   "source": [
+    "### Apply XSLT"
+   ],
    "id": "aa86435960e61937"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "from lxml import etree\n",
+    "\n",
+    "def apply_xslt_to_xml(xslt_path, xml_path):\n",
+    "    xslt = etree.parse(xslt_path)\n",
+    "    xml = etree.parse(xml_path)\n",
+    "    transformer = etree.XSLT(xslt)\n",
+    "    new_xml = transformer(xml)\n",
+    "    return str(new_xml)\n",
+    "\n",
+    "new_xml_str = apply_xslt_to_xml('path_to_your_xslt_file', 'path_to_your_xml_file')\n",
+    "print(new_xml_str)\n"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "cb3b4140ab153c08"
+  },
+  {
+   "cell_type": "markdown",
+   "source": [],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "387b5b9792505b13"
   }
  ],
  "metadata": {
diff --git a/convert-anystyle-data/lib/.gitignore b/convert-anystyle-data/lib/.gitignore
new file mode 100644
index 0000000..f59ec20
--- /dev/null
+++ b/convert-anystyle-data/lib/.gitignore
@@ -0,0 +1 @@
+*
\ No newline at end of file
-- 
GitLab