diff --git a/convert-anystyle-data/anystyle-to-tei.ipynb b/convert-anystyle-data/anystyle-to-tei.ipynb
index 6f041fddc716056fddbd9411b6220bc20fe225d4..528e4e1a0ef4a7b117a4c4333a5f920ba8a4da4b 100644
--- a/convert-anystyle-data/anystyle-to-tei.ipynb
+++ b/convert-anystyle-data/anystyle-to-tei.ipynb
@@ -16,6 +16,10 @@
     "\n",
     "We use `<bibl>` here instead of `<biblStruct>` because it is more loosely-structured and allows for a more flat datastructure. \n",
     "\n",
+    "Todo:\n",
+    "- BiblStruct mit der Ã¼bergeordneten <listBibl n=\"fuÃŸnote\" src=\"Input\">\n",
+    "\n",
+    "\n",
     "## Collect metadata on TEI `<bibl>` tags"
    ],
    "metadata": {
@@ -753,13 +757,208 @@
    ],
    "id": "4c19609699dc79c"
   },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "## Create `biblStruct` that works as a Gold Standard\n",
+    "\n",
+    "The issue here is that we need to have a way to parse individual footnotes and have a reliable way of retrieving all contained references. This is not possible with a simple `bibl` to `biblStruct` conversion.\n",
+    "\n",
+    "Target TEI schema: \n",
+    "```xml\n",
+    "<TEI>\n",
+    "    <teiHeader />\n",
+    "   <standOff>\n",
+    "        <!-- each contained footnote as listBibl -->\n",
+    "        <listBibl n=\"footnote number\" src=\"full footnote as input string\">\n",
+    "            <!-- each contained reference as biblStruct, including empty ones, e.g. when there is simply internal references such as \"Op. cit, p. 23\" or \"see Doe (n.5), p. 2\" -->\n",
+    "            <biblStruct />\n",
+    "            <biblStruct />\n",
+    "        </listBibl>\n",
+    "        <!--  in addition to footnotes containing refs, there might be a full bibliography -->\n",
+    "        <!--  in this case, each reference string is contained in a single <listBibl><biblStruct/></listBibl> -->\n",
+    "        <listBibl src=\"full bibliography entry as input string\">\n",
+    "            <biblStruct />\n",
+    "        </listBibl>\n",
+    "    </standOff>\n",
+    "</TEI>\n",
+    "```"
+   ],
+   "id": "3b4192e5e772efda"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-09-28T10:43:29.407449Z",
+     "start_time": "2024-09-28T10:43:29.365660Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "from lxml import etree\n",
+    "import os\n",
+    "from glob import glob\n",
+    "\n",
+    "def remove_encoding_declaration(xml_string):\n",
+    "    return xml_string.replace('<?xml version=\"1.0\" encoding=\"UTF-8\"?>', '')\n",
+    "\n",
+    "def create_gold_standard(ref_content, bibl_content, biblstruct_content):\n",
+    "    \"\"\" original code written by GPT-4, adapted by CB\"\"\"\n",
+    "    ref_lines = ref_content.split(\"\\n\")\n",
+    "    bibl_tree = etree.fromstring(remove_encoding_declaration(bibl_content))\n",
+    "    bibl_struct_tree = etree.fromstring(remove_encoding_declaration(biblstruct_content))\n",
+    "\n",
+    "    tei_namespace = \"http://www.tei-c.org/ns/1.0\"\n",
+    "    ns = {\"tei\": tei_namespace }\n",
+    "    output_tree = etree.Element(\"TEI\", {'xmlns':tei_namespace})\n",
+    "    header = etree.SubElement(output_tree, \"teiHeader\") # take from biblStruct_tree\n",
+    "    standoff = etree.SubElement(output_tree, \"standOff\")\n",
+    "\n",
+    "    biblStructs = bibl_struct_tree.xpath('//biblStruct')\n",
+    "    biblStructs_idx = 0\n",
+    "    \n",
+    "    # footnotes\n",
+    "    bibl_notes = bibl_tree.xpath('//tei:note', namespaces=ns)\n",
+    "    \n",
+    "    # bibliography entries\n",
+    "    src_list_bibl = bibl_tree.xpath('//tei:listBibl', namespaces=ns)\n",
+    "    src_list_bibl_idx = 0\n",
+    "    \n",
+    "    # for mixed content (having both footnotes and bibliographies, we need to know what we're currently dealing with\n",
+    "    footnote_flag = False\n",
+    "    \n",
+    "    # iterate over input\n",
+    "    for idx, ref_line in enumerate(ref_lines):\n",
+    "        print (f' - Analyzing \"{ref_line[:20]}\"')\n",
+    "        if ref_line == \"\":\n",
+    "            continue\n",
+    "            \n",
+    "        # target listBibl\n",
+    "        tgt_list_bibl = etree.SubElement(standoff, \"listBibl\", {'source': ref_line})\n",
+    "        \n",
+    "        ref_no = ref_line.split()[0]\n",
+    "        if not ref_no.isdigit():\n",
+    "            # assume its part of a bibliography or out-of-band citation not in a footnote\n",
+    "            if footnote_flag and src_list_bibl_idx > 0:\n",
+    "                src_list_bibl_idx += 1\n",
+    "            if len(src_list_bibl) < src_list_bibl_idx + 1 or len(src_list_bibl[src_list_bibl_idx]) == 0:\n",
+    "                raise RuntimeError(\"No corresponding listBibl can be found.\")\n",
+    "            src_bibl_structs = src_list_bibl[src_list_bibl_idx].xpath('./tei:biblStruct', namespaces=ns)\n",
+    "            if len(src_bibl_structs) == 0:\n",
+    "                raise RuntimeError(\"No corresponding listBibl/biblStruct can be found.\")            \n",
+    "            \n",
+    "        else:\n",
+    "            # assume that line starting with a number are footnotes \n",
+    "            footnote_flag = True\n",
+    "            note = bibl_notes[idx]\n",
+    "            n = note.attrib[\"n\"]\n",
+    "            print(f'  - Found <note n=\"{n}\">')  \n",
+    "            # abort if lines don't match since it's an error in the source gold standard\n",
+    "            if ref_no != n: \n",
+    "                raise RuntimeError(f\"Mismatch error at note: {n}\")\n",
+    "            # Create new listBibl with attributes\n",
+    "            list_bibl = etree.SubElement(standoff, \"listBibl\", {\"n\": n, \"source\": ref_lines[int(n)-1].strip()})\n",
+    "            # Iterate over bibl elements to retrieve the corresponding biblStruct elements\n",
+    "            for bibl in note.iterchildren(f\"{{{tei_namespace}}}bibl\"):\n",
+    "                title = bibl.xpath('.//tei:title/text()', namespaces=ns)\n",
+    "                if len(title) > 0:\n",
+    "                    print(f'     - Reference: {title}')\n",
+    "                else:\n",
+    "                    print(f'     - Reference with no title element')\n",
+    "                # Matching & appending biblStruct\n",
+    "                while biblStructs_idx < len(biblStructs):\n",
+    "                    biblStruct = biblStructs[biblStructs_idx]\n",
+    "                    if not title or biblStruct.xpath('.//tei:title/text()', namespaces=ns)[0] == title[0]:\n",
+    "                        list_bibl.append(biblStruct)\n",
+    "                        biblStructs_idx += 1\n",
+    "                        break\n",
+    "                    biblStructs_idx += 1\n",
+    "    \n",
+    "            \n",
+    "            \n",
+    "    return etree.tostring(output_tree, pretty_print=True)\n",
+    "\n",
+    "\n",
+    "def create_all_gold_standards(input_dir, bibl_dir, biblstruct_dir, biblstruct_gold_dir):\n",
+    "    \"\"\"written by GPT-4, adapted by CB\"\"\"\n",
+    "    for file_path in glob(f'{input_dir}/*.txt'):\n",
+    "        file_id = os.path.basename(file_path).replace(\".txt\", \"\")\n",
+    "        print(f'Processing {file_id}')\n",
+    "        bibl_path = f'{bibl_dir}/{file_id}.xml'\n",
+    "        biblstruct_path = f'{biblstruct_dir}/{file_id}.biblstruct.xml'\n",
+    "\n",
+    "        with open(file_path, 'r') as ref_file, open(bibl_path, 'r') as bibl_file, open(biblstruct_path, 'r') as biblStruct_file:\n",
+    "            ref_content = ref_file.read()\n",
+    "            bibl_content = bibl_file.read()\n",
+    "            biblStruct_content = biblStruct_file.read()\n",
+    "\n",
+    "        output_data = create_gold_standard(ref_content, bibl_content, biblStruct_content)\n",
+    "        with open(f'{biblstruct_gold_dir}/{file_id}.xml', 'w', encoding='utf-8') as output_file:\n",
+    "            output_file.write(output_data.decode())\n",
+    "\n",
+    "create_all_gold_standards('refs', 'tei-bibl', 'tei-biblStruct', 'tei-biblStruct-gold')\n",
+    "\n"
+   ],
+   "id": "ec1ac88441d6b9e5",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Processing 10.1515_zfrs-1980-0103\n",
+      " - Analyzing footnote 1\n",
+      "     - Reference with no title element\n",
+      " - Analyzing footnote 2\n",
+      "     - Reference with no title element\n",
+      "     - Reference: ['Nichtkriminalisierung als Struktur und Routine']\n",
+      " - Analyzing footnote 3\n",
+      "     - Reference: ['Bereitschaft zur Anzeigeerstattung']\n",
+      "     - Reference: ['Private Verbrechenskontrolle â€” eine empirische Untersuchung zur Anzeigeerstattung']\n",
+      " - Analyzing footnote 4\n",
+      "     - Reference with no title element\n",
+      " - Analyzing footnote 5\n",
+      "     - Reference with no title element\n",
+      " - Analyzing footnote 6\n",
+      "     - Reference with no title element\n",
+      " - Analyzing footnote 7\n",
+      "     - Reference with no title element\n",
+      " - Analyzing footnote 8\n",
+      "     - Reference with no title element\n",
+      " - Analyzing footnote 9\n",
+      "     - Reference with no title element\n",
+      " - Analyzing footnote 10\n",
+      "     - Reference with no title element\n",
+      " - Analyzing footnote 11\n",
+      "     - Reference with no title element\n",
+      " - Analyzing footnote 12\n",
+      "     - Reference with no title element\n",
+      " - Analyzing footnote 13\n"
+     ]
+    },
+    {
+     "ename": "RuntimeError",
+     "evalue": "Mismatch error at note: 13",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
+      "\u001B[0;31mRuntimeError\u001B[0m                              Traceback (most recent call last)",
+      "Cell \u001B[0;32mIn[18], line 74\u001B[0m\n\u001B[1;32m     71\u001B[0m         \u001B[38;5;28;01mwith\u001B[39;00m \u001B[38;5;28mopen\u001B[39m(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;132;01m{\u001B[39;00mbiblstruct_gold_dir\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m/\u001B[39m\u001B[38;5;132;01m{\u001B[39;00mfile_id\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m.xml\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mw\u001B[39m\u001B[38;5;124m'\u001B[39m, encoding\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mutf-8\u001B[39m\u001B[38;5;124m'\u001B[39m) \u001B[38;5;28;01mas\u001B[39;00m output_file:\n\u001B[1;32m     72\u001B[0m             output_file\u001B[38;5;241m.\u001B[39mwrite(output_data\u001B[38;5;241m.\u001B[39mdecode())\n\u001B[0;32m---> 74\u001B[0m create_all_gold_standards(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mrefs\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mtei-bibl\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mtei-biblStruct\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mtei-biblStruct-gold\u001B[39m\u001B[38;5;124m'\u001B[39m)\n",
+      "Cell \u001B[0;32mIn[18], line 70\u001B[0m, in \u001B[0;36mcreate_all_gold_standards\u001B[0;34m(input_dir, bibl_dir, biblstruct_dir, biblstruct_gold_dir)\u001B[0m\n\u001B[1;32m     67\u001B[0m     bibl_content \u001B[38;5;241m=\u001B[39m bibl_file\u001B[38;5;241m.\u001B[39mread()\n\u001B[1;32m     68\u001B[0m     biblStruct_content \u001B[38;5;241m=\u001B[39m biblStruct_file\u001B[38;5;241m.\u001B[39mread()\n\u001B[0;32m---> 70\u001B[0m output_data \u001B[38;5;241m=\u001B[39m create_gold_standard(ref_content, bibl_content, biblStruct_content)\n\u001B[1;32m     71\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m \u001B[38;5;28mopen\u001B[39m(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;132;01m{\u001B[39;00mbiblstruct_gold_dir\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m/\u001B[39m\u001B[38;5;132;01m{\u001B[39;00mfile_id\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m.xml\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mw\u001B[39m\u001B[38;5;124m'\u001B[39m, encoding\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mutf-8\u001B[39m\u001B[38;5;124m'\u001B[39m) \u001B[38;5;28;01mas\u001B[39;00m output_file:\n\u001B[1;32m     72\u001B[0m     output_file\u001B[38;5;241m.\u001B[39mwrite(output_data\u001B[38;5;241m.\u001B[39mdecode())\n",
+      "Cell \u001B[0;32mIn[18], line 52\u001B[0m, in \u001B[0;36mcreate_gold_standard\u001B[0;34m(ref_content, bibl_content, biblstruct_content)\u001B[0m\n\u001B[1;32m     50\u001B[0m                 biblStructs_idx \u001B[38;5;241m+\u001B[39m\u001B[38;5;241m=\u001B[39m \u001B[38;5;241m1\u001B[39m\n\u001B[1;32m     51\u001B[0m     \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m---> 52\u001B[0m         \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mRuntimeError\u001B[39;00m(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mMismatch error at note: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mn\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m     54\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m etree\u001B[38;5;241m.\u001B[39mtostring(output_tree, pretty_print\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m)\n",
+      "\u001B[0;31mRuntimeError\u001B[0m: Mismatch error at note: 13"
+     ]
+    }
+   ],
+   "execution_count": 18
+  },
   {
    "metadata": {},
    "cell_type": "code",
    "outputs": [],
    "execution_count": null,
-   "source": [],
-   "id": "1a8a57560f1f4868"
+   "source": "",
+   "id": "90477a6402855f3"
   }
  ],
  "metadata": {