From 472e5051554792dd95406f8a35dd81a428ad1d05 Mon Sep 17 00:00:00 2001
From: cboulanger <info@bibliograph.org>
Date: Mon, 29 Jul 2024 10:04:07 +0200
Subject: [PATCH] Add TEI tags documentation

---
 convert-anystyle-data/anystyle-to-tei.ipynb   | 105 ++++++++++--------
 .../schema/tei-tags-documentation.json        |   8 ++
 2 files changed, 65 insertions(+), 48 deletions(-)

diff --git a/convert-anystyle-data/anystyle-to-tei.ipynb b/convert-anystyle-data/anystyle-to-tei.ipynb
index 02bf772..3c0758f 100644
--- a/convert-anystyle-data/anystyle-to-tei.ipynb
+++ b/convert-anystyle-data/anystyle-to-tei.ipynb
@@ -83,8 +83,7 @@
     "    bibl_schema = schema.find(\"tei:bibl\")\n",
     "    data_list = []\n",
     "    #names = [child_element.local_name for child_element in bibl_schema.iterchildren()]\n",
-    "    names = ['author', 'citedRange', 'date', 'edition', 'editor', 'location', 'note', 'publisher', 'pubPlace', 'series',\n",
-    "             'span', 'title', 'volume', 'issue']\n",
+    "    names = ['author', 'biblScope', 'citedRange', 'date', 'edition', 'editor', 'location', 'note', 'publisher', 'pubPlace', 'series', 'span', 'title', 'volume', 'issue']\n",
     "    for name in tqdm(names, desc=\"Processing TEI tags\"):\n",
     "        doc_node = root.find(f\".//xs:element[@name='{name}']/xs:annotation/xs:documentation\", namespaces=namespaces)\n",
     "        if doc_node is not None:\n",
@@ -98,7 +97,7 @@
     "    return pd.DataFrame(data_list)\n",
     "\n",
     "\n",
-    "cache_file = \"schema/tei-tags-documentation.json\"\n",
+    "cache_file = \"schema/tei/tei-tags-documentation.json\"\n",
     "if not os.path.isfile(cache_file):\n",
     "    df = generate_tag_docs(\"schema/tei/tei_all.xsd\")\n",
     "    df.to_json(cache_file, index=False, orient='records')\n",
@@ -109,8 +108,8 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-07-29T07:56:06.664531Z",
-     "start_time": "2024-07-29T07:55:52.208162Z"
+     "end_time": "2024-07-29T08:03:30.899758Z",
+     "start_time": "2024-07-29T08:03:16.672095Z"
     }
    },
    "id": "572f566fc9784238",
@@ -118,12 +117,12 @@
     {
      "data": {
       "text/plain": [
-       "Processing TEI tags:   0%|          | 0/14 [00:00<?, ?it/s]"
+       "Processing TEI tags:   0%|          | 0/15 [00:00<?, ?it/s]"
       ],
       "application/vnd.jupyter.widget-view+json": {
        "version_major": 2,
        "version_minor": 0,
-       "model_id": "517f81d06e204232823c4b049be3de46"
+       "model_id": "2b29eb53bd824a9e8ee692483a17d0da"
       }
      },
      "metadata": {},
@@ -134,45 +133,48 @@
       "text/plain": [
        "          name                                        description  \\\n",
        "0       author  (author) in a bibliographic reference, contain...   \n",
-       "1   citedRange  (cited range) defines the range of cited conte...   \n",
-       "2         date             (date) contains a date in any format.    \n",
-       "3      edition  (edition) describes the particularities of one...   \n",
-       "4       editor  contains a secondary statement of responsibili...   \n",
-       "5     location  (location) defines the location of a place as ...   \n",
-       "6         note             (note) contains a note or annotation.    \n",
-       "7    publisher  (publisher) provides the name of the organizat...   \n",
-       "8     pubPlace  (publication place) contains the name of the p...   \n",
-       "9       series  (series information) contains information abou...   \n",
-       "10        span  associates an interpretative annotation direct...   \n",
-       "11       title    (title) contains a title for any kind of work.    \n",
+       "1    biblScope  (scope of bibliographic reference) defines the...   \n",
+       "2   citedRange  (cited range) defines the range of cited conte...   \n",
+       "3         date             (date) contains a date in any format.    \n",
+       "4      edition  (edition) describes the particularities of one...   \n",
+       "5       editor  contains a secondary statement of responsibili...   \n",
+       "6     location  (location) defines the location of a place as ...   \n",
+       "7         note             (note) contains a note or annotation.    \n",
+       "8    publisher  (publisher) provides the name of the organizat...   \n",
+       "9     pubPlace  (publication place) contains the name of the p...   \n",
+       "10      series  (series information) contains information abou...   \n",
+       "11        span  associates an interpretative annotation direct...   \n",
+       "12       title    (title) contains a title for any kind of work.    \n",
        "\n",
        "                                        documentation  \\\n",
        "0   3.12.2.2. Titles, Authors, and Editors 2.2.1. ...   \n",
        "1   3.12.2.5. Scopes and Ranges in Bibliographic C...   \n",
-       "2   3.6.4. Dates and Times 2.2.4. Publication, Dis...   \n",
-       "3                        2.2.2. The Edition Statement   \n",
-       "4              3.12.2.2. Titles, Authors, and Editors   \n",
-       "5                                      14.3.4. Places   \n",
-       "6   3.9.1. Notes and Simple Annotation 2.2.6. The ...   \n",
-       "7   3.12.2.4. Imprint, Size of a Document, and Rep...   \n",
+       "2   3.12.2.5. Scopes and Ranges in Bibliographic C...   \n",
+       "3   3.6.4. Dates and Times 2.2.4. Publication, Dis...   \n",
+       "4                        2.2.2. The Edition Statement   \n",
+       "5              3.12.2.2. Titles, Authors, and Editors   \n",
+       "6                                      14.3.4. Places   \n",
+       "7   3.9.1. Notes and Simple Annotation 2.2.6. The ...   \n",
        "8   3.12.2.4. Imprint, Size of a Document, and Rep...   \n",
-       "9   3.12.2.1. Analytic, Monographic, and Series Le...   \n",
-       "10                    18.3. Spans and Interpretations   \n",
-       "11  3.12.2.2. Titles, Authors, and Editors 2.2.1. ...   \n",
+       "9   3.12.2.4. Imprint, Size of a Document, and Rep...   \n",
+       "10  3.12.2.1. Analytic, Monographic, and Series Le...   \n",
+       "11                    18.3. Spans and Interpretations   \n",
+       "12  3.12.2.2. Titles, Authors, and Editors 2.2.1. ...   \n",
        "\n",
        "                                                 urls  \n",
        "0   {'3.12.2.2': 'https://vault.tei-c.de/P5/3.0.0/...  \n",
        "1   {'3.12.2.5': 'https://vault.tei-c.de/P5/3.0.0/...  \n",
-       "2   {'3.6.4': 'https://vault.tei-c.de/P5/3.0.0/doc...  \n",
-       "3   {'2.2.2': 'https://vault.tei-c.de/P5/3.0.0/doc...  \n",
-       "4   {'3.12.2.2': 'https://vault.tei-c.de/P5/3.0.0/...  \n",
-       "5   {'14.3.4': 'https://vault.tei-c.de/P5/3.0.0/do...  \n",
-       "6   {'3.9.1': 'https://vault.tei-c.de/P5/3.0.0/doc...  \n",
-       "7   {'3.12.2.4': 'https://vault.tei-c.de/P5/3.0.0/...  \n",
+       "2   {'3.12.2.5': 'https://vault.tei-c.de/P5/3.0.0/...  \n",
+       "3   {'3.6.4': 'https://vault.tei-c.de/P5/3.0.0/doc...  \n",
+       "4   {'2.2.2': 'https://vault.tei-c.de/P5/3.0.0/doc...  \n",
+       "5   {'3.12.2.2': 'https://vault.tei-c.de/P5/3.0.0/...  \n",
+       "6   {'14.3.4': 'https://vault.tei-c.de/P5/3.0.0/do...  \n",
+       "7   {'3.9.1': 'https://vault.tei-c.de/P5/3.0.0/doc...  \n",
        "8   {'3.12.2.4': 'https://vault.tei-c.de/P5/3.0.0/...  \n",
-       "9   {'3.12.2.1': 'https://vault.tei-c.de/P5/3.0.0/...  \n",
-       "10  {'18.3': 'https://vault.tei-c.de/P5/3.0.0/doc/...  \n",
-       "11  {'3.12.2.2': 'https://vault.tei-c.de/P5/3.0.0/...  "
+       "9   {'3.12.2.4': 'https://vault.tei-c.de/P5/3.0.0/...  \n",
+       "10  {'3.12.2.1': 'https://vault.tei-c.de/P5/3.0.0/...  \n",
+       "11  {'18.3': 'https://vault.tei-c.de/P5/3.0.0/doc/...  \n",
+       "12  {'3.12.2.2': 'https://vault.tei-c.de/P5/3.0.0/...  "
       ],
       "text/html": [
        "<div>\n",
@@ -209,76 +211,83 @@
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
+       "      <td>biblScope</td>\n",
+       "      <td>(scope of bibliographic reference) defines the...</td>\n",
+       "      <td>3.12.2.5. Scopes and Ranges in Bibliographic C...</td>\n",
+       "      <td>{'3.12.2.5': 'https://vault.tei-c.de/P5/3.0.0/...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
        "      <td>citedRange</td>\n",
        "      <td>(cited range) defines the range of cited conte...</td>\n",
        "      <td>3.12.2.5. Scopes and Ranges in Bibliographic C...</td>\n",
        "      <td>{'3.12.2.5': 'https://vault.tei-c.de/P5/3.0.0/...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2</th>\n",
+       "      <th>3</th>\n",
        "      <td>date</td>\n",
        "      <td>(date) contains a date in any format.</td>\n",
        "      <td>3.6.4. Dates and Times 2.2.4. Publication, Dis...</td>\n",
        "      <td>{'3.6.4': 'https://vault.tei-c.de/P5/3.0.0/doc...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>3</th>\n",
+       "      <th>4</th>\n",
        "      <td>edition</td>\n",
        "      <td>(edition) describes the particularities of one...</td>\n",
        "      <td>2.2.2. The Edition Statement</td>\n",
        "      <td>{'2.2.2': 'https://vault.tei-c.de/P5/3.0.0/doc...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>4</th>\n",
+       "      <th>5</th>\n",
        "      <td>editor</td>\n",
        "      <td>contains a secondary statement of responsibili...</td>\n",
        "      <td>3.12.2.2. Titles, Authors, and Editors</td>\n",
        "      <td>{'3.12.2.2': 'https://vault.tei-c.de/P5/3.0.0/...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>5</th>\n",
+       "      <th>6</th>\n",
        "      <td>location</td>\n",
        "      <td>(location) defines the location of a place as ...</td>\n",
        "      <td>14.3.4. Places</td>\n",
        "      <td>{'14.3.4': 'https://vault.tei-c.de/P5/3.0.0/do...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>6</th>\n",
+       "      <th>7</th>\n",
        "      <td>note</td>\n",
        "      <td>(note) contains a note or annotation.</td>\n",
        "      <td>3.9.1. Notes and Simple Annotation 2.2.6. The ...</td>\n",
        "      <td>{'3.9.1': 'https://vault.tei-c.de/P5/3.0.0/doc...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>7</th>\n",
+       "      <th>8</th>\n",
        "      <td>publisher</td>\n",
        "      <td>(publisher) provides the name of the organizat...</td>\n",
        "      <td>3.12.2.4. Imprint, Size of a Document, and Rep...</td>\n",
        "      <td>{'3.12.2.4': 'https://vault.tei-c.de/P5/3.0.0/...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>8</th>\n",
+       "      <th>9</th>\n",
        "      <td>pubPlace</td>\n",
        "      <td>(publication place) contains the name of the p...</td>\n",
        "      <td>3.12.2.4. Imprint, Size of a Document, and Rep...</td>\n",
        "      <td>{'3.12.2.4': 'https://vault.tei-c.de/P5/3.0.0/...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>9</th>\n",
+       "      <th>10</th>\n",
        "      <td>series</td>\n",
        "      <td>(series information) contains information abou...</td>\n",
        "      <td>3.12.2.1. Analytic, Monographic, and Series Le...</td>\n",
        "      <td>{'3.12.2.1': 'https://vault.tei-c.de/P5/3.0.0/...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>10</th>\n",
+       "      <th>11</th>\n",
        "      <td>span</td>\n",
        "      <td>associates an interpretative annotation direct...</td>\n",
        "      <td>18.3. Spans and Interpretations</td>\n",
        "      <td>{'18.3': 'https://vault.tei-c.de/P5/3.0.0/doc/...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>11</th>\n",
+       "      <th>12</th>\n",
        "      <td>title</td>\n",
        "      <td>(title) contains a title for any kind of work.</td>\n",
        "      <td>3.12.2.2. Titles, Authors, and Editors 2.2.1. ...</td>\n",
@@ -289,12 +298,12 @@
        "</div>"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 11
+   "execution_count": 13
   },
   {
    "metadata": {
diff --git a/convert-anystyle-data/schema/tei-tags-documentation.json b/convert-anystyle-data/schema/tei-tags-documentation.json
index a5e7a9b..96623d2 100644
--- a/convert-anystyle-data/schema/tei-tags-documentation.json
+++ b/convert-anystyle-data/schema/tei-tags-documentation.json
@@ -8,6 +8,14 @@
       "2.2.1": "https:\/\/vault.tei-c.de\/P5\/3.0.0\/doc\/tei-p5-doc\/en\/html\/HD.html#HD21"
     }
   },
+  {
+    "name": "biblScope",
+    "description": "(scope of bibliographic reference) defines the scope of a bibliographic reference, for example as a list of page numbers, or a named subdivision of a larger work. ",
+    "documentation": "3.12.2.5. Scopes and Ranges in Bibliographic Citations",
+    "urls": {
+      "3.12.2.5": "https:\/\/vault.tei-c.de\/P5\/3.0.0\/doc\/tei-p5-doc\/en\/html\/CO.html#COBICOB"
+    }
+  },
   {
     "name": "citedRange",
     "description": "(cited range) defines the range of cited content, often represented by pages or other units ",
-- 
GitLab