diff --git a/convert-anystyle-data/anystyle-to-tei.ipynb b/convert-anystyle-data/anystyle-to-tei.ipynb index b9994e23d568e2548be089a52beb299dfdb0a960..6b5af98331283051d7fafb30734f9a9f79e0ef91 100644 --- a/convert-anystyle-data/anystyle-to-tei.ipynb +++ b/convert-anystyle-data/anystyle-to-tei.ipynb @@ -431,20 +431,27 @@ "import glob\n", "from urllib.request import urlopen\n", "import requests\n", - "import traceback\n", + "\n", + "if not 'cache' in locals():\n", + " cache = {}\n", "\n", "class HttpsResolver(etree.Resolver):\n", - " def resolve(self, url, id, context): \n", - " r = requests.get(url)\n", - " assert(r.status_code == 200)\n", - " return self.resolve_string(r.content, context, base_url=url)\n", + " def resolve(self, url, id, context):\n", + " if url in cache:\n", + " xml_str = cache[url]\n", + " else:\n", + " r = requests.get(url)\n", + " assert(r.status_code == 200)\n", + " xml_str = cache[url] = r.content\n", + " return self.resolve_string(xml_str, context, base_url=url)\n", + "\n", + "xml_parser = etree.XMLParser(no_network=False)\n", + "xml_parser.resolvers.add(HttpsResolver())\n", "\n", "def apply_xslt(xslt_path, xml_input_path, xml_output_path):\n", " try:\n", " if xslt_path.startswith('http'):\n", " with urlopen(xslt_path) as f:\n", - " xml_parser = etree.XMLParser(no_network=False)\n", - " xml_parser.resolvers.add(HttpsResolver())\n", " xslt_doc = etree.parse(f, parser=xml_parser)\n", " else:\n", " xslt_doc = etree.parse(xslt_path)\n", @@ -467,8 +474,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-08-03T20:28:20.087934Z", - "start_time": "2024-08-03T20:28:18.699591Z" + "end_time": "2024-08-03T20:47:26.220279Z", + "start_time": "2024-08-03T20:47:25.805461Z" } }, "id": "cb3b4140ab153c08", @@ -488,7 +495,7 @@ ] } ], - "execution_count": 28 + "execution_count": 33 }, { "metadata": {