From c88e8bcb3514da8ca0de03b948067da6ae45fd4d Mon Sep 17 00:00:00 2001 From: Christian Boulanger <boulanger@lhlt.mpg.de> Date: Mon, 11 Mar 2024 18:40:11 +0100 Subject: [PATCH] Fixes to wikidata experiment --- wikidata/lib/dnb_data.py | 30 +++--- wikidata/query-wikidata.ipynb | 176 +++++++++++++++++++++++----------- 2 files changed, 132 insertions(+), 74 deletions(-) diff --git a/wikidata/lib/dnb_data.py b/wikidata/lib/dnb_data.py index 753d204..9aed61b 100644 --- a/wikidata/lib/dnb_data.py +++ b/wikidata/lib/dnb_data.py @@ -2,9 +2,9 @@ import requests from bs4 import BeautifulSoup -import csv import urllib.parse from lxml import etree +import pandas as pd def generate_query_string(person, startRecord): base_url = "https://services.dnb.de/sru/dnb" @@ -63,14 +63,13 @@ def parse_records(xml_data): return results, num_records -def query_to_csv(person, file_path): +def get_publications(person): start_record = 1 - num_records = 0 retrieved_records = 0 + data = [] # Initialize an empty list to store the data - with open(file_path, 'w', newline='', encoding='utf-8') as csvfile: - csvwriter = csv.writer(csvfile) - csvwriter.writerow(['Title', 'Author', 'Publication Year']) + # The structure of the DataFrame columns + columns = ['Title', 'Author', 'Publication Year'] while True: xml_data = fetch_data(person, start_record) @@ -79,21 +78,16 @@ def query_to_csv(person, file_path): if not results: break - with open(file_path, 'a', newline='', encoding='utf-8') as csvfile: - csvwriter = csv.writer(csvfile) - for row in results: - csvwriter.writerow(row) + data.extend(results) + retrieved_records += len(results) start_record += len(results) if retrieved_records >= num_records: break -def load_data_from_csv(file_path): - data = [] - with open(file_path, 'r', newline='', encoding='utf-8') as csvfile: - csvreader = csv.reader(csvfile) - next(csvreader) # Skip the header row - for row in csvreader: - data.append(row) - return data + # Convert the list of data to a pandas DataFrame + df = pd.DataFrame(data, columns=columns) + return df + + diff --git a/wikidata/query-wikidata.ipynb b/wikidata/query-wikidata.ipynb index af3bcd9..2f5752f 100644 --- a/wikidata/query-wikidata.ipynb +++ b/wikidata/query-wikidata.ipynb @@ -2,70 +2,77 @@ "cells": [ { "cell_type": "code", - "execution_count": 17, + "execution_count": 20, "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { - "end_time": "2024-03-10T21:43:47.207010900Z", - "start_time": "2024-03-10T21:43:47.199002300Z" + "end_time": "2024-03-11T17:38:32.545716600Z", + "start_time": "2024-03-11T17:38:32.539202500Z" } }, "outputs": [], "source": [ "import os.path\n", - "import textwrap\n", "import requests\n", "import pandas as pd\n", + "import textwrap\n", "\n", - "def generate_sparql_query(fullName, property_labels_to_ids, language='en'):\n", + "def generate_sparql_query(fullName, property_labels_to_ids, language='en', qid=None):\n", " \"\"\"\n", - " Query WikiData for the properties of the given person listed in the given property map.\n", - " All properties that are simple values without a label must have an \"_id\" suffix, all date\n", - " properties must begin with \"date\"\n", - " :param fullName: \n", - " :param property_labels_to_ids: \n", - " :param language: \n", - " :return: \n", + " Query WikiData for the properties of the given person listed in the given property map,\n", + " either by fullName or QID. When a QID is provided, ?itemLabel is not included in the query.\n", + " :param fullName: Name of the person to query\n", + " :param property_labels_to_ids: Dictionary mapping property labels to WikiData property IDs\n", + " :param language: Language code for the query results\n", + " :param qid: WikiData entity ID (QID) for the person\n", + " :return: SPARQL query string\n", " \"\"\"\n", " propSelection = \"\"\n", " for label, pid in property_labels_to_ids.items():\n", - " if label.endswith(\"_id\") or label.startswith(\"image\"):\n", - " # literal values, including URIs - this needs to be solved in a more generic way\n", - " propSelection += f\"\"\"\n", - " OPTIONAL {{ ?item wdt:{pid} ?{label}. }}\"\"\"\n", - " elif label.startswith(\"date\"): \n", - " # Dates, fetched directly but need special handling for formatting if desired\n", + " if label.endswith(\"_id\") or label.startswith(\"image\") or label.startswith(\"date\"):\n", " propSelection += f\"\"\"\n", " OPTIONAL {{ ?item wdt:{pid} ?{label}. }}\"\"\"\n", " else:\n", " propSelection += f\"\"\"\n", - " OPTIONAL {{ ?item wdt:{pid} ?{label}Id .\n", - " ?{label}Id rdfs:label ?{label} FILTER(LANG(?{label}) = \"{language}\") .\n", - " SERVICE wikibase:label {{ bd:serviceParam wikibase:language \"{language}\". }} }}\"\"\"\n", + " OPTIONAL {{ ?item wdt:{pid} ?{label}Id . ?{label}Id rdfs:label ?{label} FILTER(LANG(?{label}) = \"{language}\") . }}\"\"\"\n", "\n", + " if qid:\n", + " selectClause = \"SELECT DISTINCT ?item\"\n", + " itemConstraint = f\"BIND(wd:{qid} AS ?item).\"\n", + " groupByClause = \"GROUP BY ?item\"\n", + " else:\n", + " selectClause = \"SELECT DISTINCT ?item ?itemLabel\"\n", + " selectClause += \"\\n \".join([f\"(SAMPLE(?{label}) AS ?{label})\" for label in property_labels_to_ids.keys()])\n", + " itemConstraint = f'?item wdt:P31 wd:Q5; rdfs:label \"{fullName}\"@{language} .'\n", + " groupByClause = \"GROUP BY ?item ?itemLabel\"\n", + " \n", " query = textwrap.dedent(f\"\"\"\n", - " SELECT DISTINCT ?item ?itemLabel {\"\".join([f\"(SAMPLE(?{label}) AS ?{label})\" for label in property_labels_to_ids])}\n", + " {selectClause} \n", " WHERE {{\n", - " ?item wdt:P31 wd:Q5; rdfs:label \"{fullName}\"@{language}.\n", - " {textwrap.dedent(propSelection)}\n", + " {itemConstraint}\n", + " {propSelection}\n", " }}\n", - " GROUP BY ?item ?itemLabel \n", + " {groupByClause}\n", " \"\"\")\n", + "\n", " return query\n", "\n", + "\n", + "\n", "def construct_image_url(filename):\n", " return f\"https://commons.wikimedia.org/wiki/Special:FilePath/{requests.utils.quote(filename)}\"\n", "\n", "\n", "\n", - "def query_wikidata(fullName, property_map, language='en'):\n", + "def query_wikidata(fullName, property_map, language='en', qid=None):\n", " SPARQL_ENDPOINT = \"https://query.wikidata.org/sparql\"\n", - " query = generate_sparql_query(fullName, property_map, language)\n", + " query = generate_sparql_query(fullName, property_map, language, qid=qid)\n", " headers = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'}\n", " response = requests.get(SPARQL_ENDPOINT, headers=headers, params={'query': query, 'format': 'json'})\n", "\n", " if response.status_code != 200:\n", + " print(query)\n", " response.raise_for_status()\n", " \n", " results = response.json()['results']['bindings']\n", @@ -129,35 +136,80 @@ " return links\n", "\n", "\n", - "\n", - "def get_person_info_from_wikidata(names, property_map, language='en'):\n", + "def get_person_info_from_wikidata(names, property_map, languages=None):\n", + " if languages is None:\n", + " languages = ['en', 'de']\n", " all_data = []\n", - " for fullName in names:\n", - " results = query_wikidata(fullName, property_map, language)\n", + " print('Retrieving scholar data...')\n", + " for item in names:\n", + " if type(item) is tuple:\n", + " results = query_wikidata(item[0], property_map, languages[0], qid=item[1])\n", + " else:\n", + " results = query_wikidata(item, property_map, languages[0])\n", + " \n", " all_data += results\n", " if len(all_data) > 0:\n", " # Ensure fullName appears first by reordering columns based on property_labels_to_ids keys\n", " columns_order = ['fullName', 'qid'] + list(property_map.keys())\n", " df = pd.DataFrame(all_data, columns=columns_order)\n", - " # add wikipedia links\n", - " df[f'wikipedia_{language}'] = df.apply(lambda r: get_wikipedia_links(r['qid'], [language]).get(language), axis=1 )\n", + " \n", + " # Add wikipedia links\n", + " print(\"Retrieving wikipedia URLs\")\n", + " for language in languages:\n", + " df[f'wikipedia_{language}'] = df.apply(lambda r: get_wikipedia_links(r['qid'], language).get(language), axis=1)\n", " else:\n", - " df = pd.DataFrame(columns=['fullName'] + list(property_map.keys()) + [f'wikipedia_{language}']) \n", + " df = pd.DataFrame(columns=['fullName'] + list(property_map.keys()) + [f'wikipedia_{language}'])\n", " return df" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 22, "outputs": [ { - "data": { - "text/plain": " fullName qid sexOrGender \\\n0 Hans Kelsen Q84165 male \n1 Hugo Sinzheimer Q86043 male \n2 Karl Renner Q48010797 male \n3 Karl Renner Q15451414 male \n4 Karl Renner Q94817885 male \n5 Karl Renner Q11726 male \n6 Karl Renner Q107022720 male \n7 Ernst Fraenkel Q21997821 male \n8 Ernst Fraenkel Q92522 male \n9 Ernst Fraenkel Q1358093 male \n10 Ernst Fraenkel Q86812 male \n11 Franz Leopold Neumann Q63195 male \n12 Franz Leopold Neumann Q112562068 male \n13 Otto Kahn-Freund Q121832 male \n14 Otto Kirchheimer Q214397 male \n15 Ludwig Bendix Q28053205 male \n16 Ludwig Bendix Q15449424 male \n17 Arthur Nussbaum Q103088 male \n18 Arthur Nussbaum Q94847354 male \n19 Theodor Geiger Q96410 male \n20 Erhard Blankenburg Q51595283 male \n21 Wolfgang Kaupen Q93221485 male \n22 Rüdiger Lautmann Q91074 male \n23 Thilo Ramm Q59533838 male \n24 Rudolf Wiethölter Q1512482 male \n25 Niklas Luhmann Q57238 male \n26 Niklas Luhmann Q85691627 None \n27 Niklas Luhmann Q61803657 None \n28 Gunther Teubner Q98304 male \n29 Volkmar Gessner Q15435946 male \n30 Konstanze Plett Q95192683 female \n31 Ute Sacksofsky Q48562036 female \n32 Susanne Baer Q95656754 female \n33 Susanne Baer Q101872 female \n\n image \\\n0 http://commons.wikimedia.org/wiki/Special:File... \n1 http://commons.wikimedia.org/wiki/Special:File... \n2 None \n3 http://commons.wikimedia.org/wiki/Special:File... \n4 None \n5 http://commons.wikimedia.org/wiki/Special:File... \n6 None \n7 None \n8 None \n9 None \n10 None \n11 None \n12 None \n13 http://commons.wikimedia.org/wiki/Special:File... \n14 None \n15 None \n16 None \n17 http://commons.wikimedia.org/wiki/Special:File... \n18 None \n19 None \n20 http://commons.wikimedia.org/wiki/Special:File... \n21 None \n22 http://commons.wikimedia.org/wiki/Special:File... \n23 None \n24 None \n25 http://commons.wikimedia.org/wiki/Special:File... \n26 None \n27 None \n28 http://commons.wikimedia.org/wiki/Special:File... \n29 http://commons.wikimedia.org/wiki/Special:File... \n30 None \n31 None \n32 None \n33 http://commons.wikimedia.org/wiki/Special:File... \n\n countryOfCitizenship givenName familyName dateOfBirth \\\n0 United States of America Hans Kelsen 1881-10-11T00:00:00Z \n1 Germany D. Sinzheimer 1875-01-01T00:00:00Z \n2 Austria Karl Renner 1884-12-24T00:00:00Z \n3 Germany Karl Renner 1833-05-13T00:00:00Z \n4 None Karl Renner None \n5 Austria Karl Renner 1870-12-14T00:00:00Z \n6 None Karl Renner 1889-12-01T00:00:00Z \n7 None Ernst Fraenkel 1923-01-01T00:00:00Z \n8 Germany Ernst Fraenkel 1881-10-16T00:00:00Z \n9 Germany Ernst Fraenkel 1891-04-05T00:00:00Z \n10 United States of America Ernst Fraenkel 1898-12-26T00:00:00Z \n11 Weimar Republic Franz Neumann 1900-05-23T00:00:00Z \n12 None Franz Neumann None \n13 Germany Otto None 1900-11-17T00:00:00Z \n14 United States of America Otto Kirchheimer 1905-11-11T00:00:00Z \n15 None Ludwig Bendix 1857-10-28T00:00:00Z \n16 Germany Ludwig Bendix 1877-06-28T00:00:00Z \n17 United States of America Arthur Nussbaum 1877-01-01T00:00:00Z \n18 None Arthur None 1853-10-18T00:00:00Z \n19 Denmark Theodor Geiger 1891-11-09T00:00:00Z \n20 Germany Erhard Blankenburg 1938-10-30T00:00:00Z \n21 None Wolfgang None 1936-01-01T00:00:00Z \n22 Germany Rüdiger None 1935-12-22T00:00:00Z \n23 Germany Thilo Ramm 1925-04-04T00:00:00Z \n24 Germany Rudolf None 1929-07-17T00:00:00Z \n25 Germany Niklas Luhmann 1927-12-08T00:00:00Z \n26 None None None None \n27 None None None None \n28 Germany Gunther Teubner 1944-04-30T00:00:00Z \n29 Germany Volkmar Gessner 1937-10-09T00:00:00Z \n30 None None None 1947-01-01T00:00:00Z \n31 Germany Ute None 1960-01-01T00:00:00Z \n32 None Susanne Baer None \n33 Germany Susanne Baer 1964-02-16T00:00:00Z \n\n dateOfDeath occupation fieldOfWork \\\n0 1973-04-19T00:00:00Z philosopher constitutional law \n1 1945-09-16T00:00:00Z trade unionist None \n2 1929-06-24T00:00:00Z None None \n3 1913-09-22T00:00:00Z merchant None \n4 None board member None \n5 1950-12-31T00:00:00Z diplomat politics \n6 1947-05-17T00:00:00Z local politician None \n7 2014-11-13T00:00:00Z businessperson None \n8 1957-10-02T00:00:00Z linguist linguistics \n9 1971-08-18T00:00:00Z economic historian None \n10 1975-03-28T00:00:00Z political scientist None \n11 1954-09-02T00:00:00Z sociologist None \n12 None printer pressing \n13 1979-06-16T00:00:00Z university teacher None \n14 1965-11-22T00:00:00Z university teacher None \n15 1923-09-28T00:00:00Z university teacher None \n16 1954-01-03T00:00:00Z notary None \n17 1964-01-01T00:00:00Z university teacher international law \n18 1916-05-11T00:00:00Z journalist None \n19 1952-06-16T00:00:00Z sociologist None \n20 2018-03-28T00:00:00Z sociologist sociology of law \n21 1981-01-01T00:00:00Z sociologist sociology of law \n22 None LGBTQI+ rights activist homosexuality \n23 2018-06-17T00:00:00Z legal scholar None \n24 None university teacher None \n25 1998-11-06T00:00:00Z cyberneticist sociology \n26 None researcher None \n27 None researcher None \n28 None jurist None \n29 2014-11-08T00:00:00Z sociologist sociology of law \n30 None jurist None \n31 None university teacher comparative law \n32 None actor None \n33 None judge None \n\n viaf_id isni_id gnd_id \\\n0 31998356 0000000121266076 118561219 \n1 27864307 0000000109619641 118614711 \n2 78817437 0000000054940875 134869036 \n3 171170593 None 1012296458 \n4 100408035 None 139099409 \n5 61669459 0000000121358165 118599739 \n6 None None None \n7 None None None \n8 24595835 0000000081027854 11669064X \n9 50078162 None 121259854 \n10 27108403 0000000110230959 118534602 \n11 15561879 0000000109564943 118587293 \n12 637163874508945722514 None None \n13 76317591 0000000109168959 118559362 \n14 32042801 0000000081110244 118562371 \n15 88720482 0000000061811334 1023309920 \n16 74647579 0000000081553379 118702033 \n17 5180962 0000000120988288 117071676 \n18 308715560 None 1051211875 \n19 56667946 0000000109038951 118538187 \n20 64109592 0000000110676109 115459235 \n21 32919813 0000000035495614 124045405 \n22 24732961 000000011469331X 120502208 \n23 9924244 0000000108689541 116327391 \n24 106974404 0000000116961365 1034437860 \n25 29546145 0000000122778532 118575147 \n26 None None None \n27 None None None \n28 108364502 0000000109312017 119443562 \n29 69100039 0000000109127065 170469328 \n30 20628376 0000000066847723 124957048 \n31 54395802 0000000109029166 132505746 \n32 189145911193727062107 None 1093595477 \n33 262465472 0000000030338599 113854161 \n\n wikipedia_en \n0 https://en.wikipedia.org/wiki/Hans%20Kelsen \n1 https://en.wikipedia.org/wiki/Hugo%20Sinzheimer \n2 None \n3 None \n4 None \n5 https://en.wikipedia.org/wiki/Karl%20Renner \n6 None \n7 https://en.wikipedia.org/wiki/Ernst%20Fraenkel... \n8 https://en.wikipedia.org/wiki/Ernst%20Fraenkel... \n9 None \n10 https://en.wikipedia.org/wiki/Ernst%20Fraenkel... \n11 https://en.wikipedia.org/wiki/Franz%20Neumann%... \n12 None \n13 https://en.wikipedia.org/wiki/Otto%20Kahn-Freund \n14 https://en.wikipedia.org/wiki/Otto%20Kirchheimer \n15 None \n16 None \n17 https://en.wikipedia.org/wiki/Arthur%20Nussbaum \n18 None \n19 https://en.wikipedia.org/wiki/Theodor%20Geiger \n20 https://en.wikipedia.org/wiki/Erhard%20Blanken... \n21 None \n22 https://en.wikipedia.org/wiki/R%C3%BCdiger%20L... \n23 None \n24 None \n25 https://en.wikipedia.org/wiki/Niklas%20Luhmann \n26 None \n27 None \n28 https://en.wikipedia.org/wiki/Gunther%20Teubner \n29 https://en.wikipedia.org/wiki/Volkmar%20Gessner \n30 None \n31 None \n32 None \n33 https://en.wikipedia.org/wiki/Susanne%20Baer ", - "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>fullName</th>\n <th>qid</th>\n <th>sexOrGender</th>\n <th>image</th>\n <th>countryOfCitizenship</th>\n <th>givenName</th>\n <th>familyName</th>\n <th>dateOfBirth</th>\n <th>dateOfDeath</th>\n <th>occupation</th>\n <th>fieldOfWork</th>\n <th>viaf_id</th>\n <th>isni_id</th>\n <th>gnd_id</th>\n <th>wikipedia_en</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Hans Kelsen</td>\n <td>Q84165</td>\n <td>male</td>\n <td>http://commons.wikimedia.org/wiki/Special:File...</td>\n <td>United States of America</td>\n <td>Hans</td>\n <td>Kelsen</td>\n <td>1881-10-11T00:00:00Z</td>\n <td>1973-04-19T00:00:00Z</td>\n <td>philosopher</td>\n <td>constitutional law</td>\n <td>31998356</td>\n <td>0000000121266076</td>\n <td>118561219</td>\n <td>https://en.wikipedia.org/wiki/Hans%20Kelsen</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Hugo Sinzheimer</td>\n <td>Q86043</td>\n <td>male</td>\n <td>http://commons.wikimedia.org/wiki/Special:File...</td>\n <td>Germany</td>\n <td>D.</td>\n <td>Sinzheimer</td>\n <td>1875-01-01T00:00:00Z</td>\n <td>1945-09-16T00:00:00Z</td>\n <td>trade unionist</td>\n <td>None</td>\n <td>27864307</td>\n <td>0000000109619641</td>\n <td>118614711</td>\n <td>https://en.wikipedia.org/wiki/Hugo%20Sinzheimer</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Karl Renner</td>\n <td>Q48010797</td>\n <td>male</td>\n <td>None</td>\n <td>Austria</td>\n <td>Karl</td>\n <td>Renner</td>\n <td>1884-12-24T00:00:00Z</td>\n <td>1929-06-24T00:00:00Z</td>\n <td>None</td>\n <td>None</td>\n <td>78817437</td>\n <td>0000000054940875</td>\n <td>134869036</td>\n <td>None</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Karl Renner</td>\n <td>Q15451414</td>\n <td>male</td>\n <td>http://commons.wikimedia.org/wiki/Special:File...</td>\n <td>Germany</td>\n <td>Karl</td>\n <td>Renner</td>\n <td>1833-05-13T00:00:00Z</td>\n <td>1913-09-22T00:00:00Z</td>\n <td>merchant</td>\n <td>None</td>\n <td>171170593</td>\n <td>None</td>\n <td>1012296458</td>\n <td>None</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Karl Renner</td>\n <td>Q94817885</td>\n <td>male</td>\n <td>None</td>\n <td>None</td>\n <td>Karl</td>\n <td>Renner</td>\n <td>None</td>\n <td>None</td>\n <td>board member</td>\n <td>None</td>\n <td>100408035</td>\n <td>None</td>\n <td>139099409</td>\n <td>None</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Karl Renner</td>\n <td>Q11726</td>\n <td>male</td>\n <td>http://commons.wikimedia.org/wiki/Special:File...</td>\n <td>Austria</td>\n <td>Karl</td>\n <td>Renner</td>\n <td>1870-12-14T00:00:00Z</td>\n <td>1950-12-31T00:00:00Z</td>\n <td>diplomat</td>\n <td>politics</td>\n <td>61669459</td>\n <td>0000000121358165</td>\n <td>118599739</td>\n <td>https://en.wikipedia.org/wiki/Karl%20Renner</td>\n </tr>\n <tr>\n <th>6</th>\n <td>Karl Renner</td>\n <td>Q107022720</td>\n <td>male</td>\n <td>None</td>\n <td>None</td>\n <td>Karl</td>\n <td>Renner</td>\n <td>1889-12-01T00:00:00Z</td>\n <td>1947-05-17T00:00:00Z</td>\n <td>local politician</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n </tr>\n <tr>\n <th>7</th>\n <td>Ernst Fraenkel</td>\n <td>Q21997821</td>\n <td>male</td>\n <td>None</td>\n <td>None</td>\n <td>Ernst</td>\n <td>Fraenkel</td>\n <td>1923-01-01T00:00:00Z</td>\n <td>2014-11-13T00:00:00Z</td>\n <td>businessperson</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>https://en.wikipedia.org/wiki/Ernst%20Fraenkel...</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Ernst Fraenkel</td>\n <td>Q92522</td>\n <td>male</td>\n <td>None</td>\n <td>Germany</td>\n <td>Ernst</td>\n <td>Fraenkel</td>\n <td>1881-10-16T00:00:00Z</td>\n <td>1957-10-02T00:00:00Z</td>\n <td>linguist</td>\n <td>linguistics</td>\n <td>24595835</td>\n <td>0000000081027854</td>\n <td>11669064X</td>\n <td>https://en.wikipedia.org/wiki/Ernst%20Fraenkel...</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Ernst Fraenkel</td>\n <td>Q1358093</td>\n <td>male</td>\n <td>None</td>\n <td>Germany</td>\n <td>Ernst</td>\n <td>Fraenkel</td>\n <td>1891-04-05T00:00:00Z</td>\n <td>1971-08-18T00:00:00Z</td>\n <td>economic historian</td>\n <td>None</td>\n <td>50078162</td>\n <td>None</td>\n <td>121259854</td>\n <td>None</td>\n </tr>\n <tr>\n <th>10</th>\n <td>Ernst Fraenkel</td>\n <td>Q86812</td>\n <td>male</td>\n <td>None</td>\n <td>United States of America</td>\n <td>Ernst</td>\n <td>Fraenkel</td>\n <td>1898-12-26T00:00:00Z</td>\n <td>1975-03-28T00:00:00Z</td>\n <td>political scientist</td>\n <td>None</td>\n <td>27108403</td>\n <td>0000000110230959</td>\n <td>118534602</td>\n <td>https://en.wikipedia.org/wiki/Ernst%20Fraenkel...</td>\n </tr>\n <tr>\n <th>11</th>\n <td>Franz Leopold Neumann</td>\n <td>Q63195</td>\n <td>male</td>\n <td>None</td>\n <td>Weimar Republic</td>\n <td>Franz</td>\n <td>Neumann</td>\n <td>1900-05-23T00:00:00Z</td>\n <td>1954-09-02T00:00:00Z</td>\n <td>sociologist</td>\n <td>None</td>\n <td>15561879</td>\n <td>0000000109564943</td>\n <td>118587293</td>\n <td>https://en.wikipedia.org/wiki/Franz%20Neumann%...</td>\n </tr>\n <tr>\n <th>12</th>\n <td>Franz Leopold Neumann</td>\n <td>Q112562068</td>\n <td>male</td>\n <td>None</td>\n <td>None</td>\n <td>Franz</td>\n <td>Neumann</td>\n <td>None</td>\n <td>None</td>\n <td>printer</td>\n <td>pressing</td>\n <td>637163874508945722514</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n </tr>\n <tr>\n <th>13</th>\n <td>Otto Kahn-Freund</td>\n <td>Q121832</td>\n <td>male</td>\n <td>http://commons.wikimedia.org/wiki/Special:File...</td>\n <td>Germany</td>\n <td>Otto</td>\n <td>None</td>\n <td>1900-11-17T00:00:00Z</td>\n <td>1979-06-16T00:00:00Z</td>\n <td>university teacher</td>\n <td>None</td>\n <td>76317591</td>\n <td>0000000109168959</td>\n <td>118559362</td>\n <td>https://en.wikipedia.org/wiki/Otto%20Kahn-Freund</td>\n </tr>\n <tr>\n <th>14</th>\n <td>Otto Kirchheimer</td>\n <td>Q214397</td>\n <td>male</td>\n <td>None</td>\n <td>United States of America</td>\n <td>Otto</td>\n <td>Kirchheimer</td>\n <td>1905-11-11T00:00:00Z</td>\n <td>1965-11-22T00:00:00Z</td>\n <td>university teacher</td>\n <td>None</td>\n <td>32042801</td>\n <td>0000000081110244</td>\n <td>118562371</td>\n <td>https://en.wikipedia.org/wiki/Otto%20Kirchheimer</td>\n </tr>\n <tr>\n <th>15</th>\n <td>Ludwig Bendix</td>\n <td>Q28053205</td>\n <td>male</td>\n <td>None</td>\n <td>None</td>\n <td>Ludwig</td>\n <td>Bendix</td>\n <td>1857-10-28T00:00:00Z</td>\n <td>1923-09-28T00:00:00Z</td>\n <td>university teacher</td>\n <td>None</td>\n <td>88720482</td>\n <td>0000000061811334</td>\n <td>1023309920</td>\n <td>None</td>\n </tr>\n <tr>\n <th>16</th>\n <td>Ludwig Bendix</td>\n <td>Q15449424</td>\n <td>male</td>\n <td>None</td>\n <td>Germany</td>\n <td>Ludwig</td>\n <td>Bendix</td>\n <td>1877-06-28T00:00:00Z</td>\n <td>1954-01-03T00:00:00Z</td>\n <td>notary</td>\n <td>None</td>\n <td>74647579</td>\n <td>0000000081553379</td>\n <td>118702033</td>\n <td>None</td>\n </tr>\n <tr>\n <th>17</th>\n <td>Arthur Nussbaum</td>\n <td>Q103088</td>\n <td>male</td>\n <td>http://commons.wikimedia.org/wiki/Special:File...</td>\n <td>United States of America</td>\n <td>Arthur</td>\n <td>Nussbaum</td>\n <td>1877-01-01T00:00:00Z</td>\n <td>1964-01-01T00:00:00Z</td>\n <td>university teacher</td>\n <td>international law</td>\n <td>5180962</td>\n <td>0000000120988288</td>\n <td>117071676</td>\n <td>https://en.wikipedia.org/wiki/Arthur%20Nussbaum</td>\n </tr>\n <tr>\n <th>18</th>\n <td>Arthur Nussbaum</td>\n <td>Q94847354</td>\n <td>male</td>\n <td>None</td>\n <td>None</td>\n <td>Arthur</td>\n <td>None</td>\n <td>1853-10-18T00:00:00Z</td>\n <td>1916-05-11T00:00:00Z</td>\n <td>journalist</td>\n <td>None</td>\n <td>308715560</td>\n <td>None</td>\n <td>1051211875</td>\n <td>None</td>\n </tr>\n <tr>\n <th>19</th>\n <td>Theodor Geiger</td>\n <td>Q96410</td>\n <td>male</td>\n <td>None</td>\n <td>Denmark</td>\n <td>Theodor</td>\n <td>Geiger</td>\n <td>1891-11-09T00:00:00Z</td>\n <td>1952-06-16T00:00:00Z</td>\n <td>sociologist</td>\n <td>None</td>\n <td>56667946</td>\n <td>0000000109038951</td>\n <td>118538187</td>\n <td>https://en.wikipedia.org/wiki/Theodor%20Geiger</td>\n </tr>\n <tr>\n <th>20</th>\n <td>Erhard Blankenburg</td>\n <td>Q51595283</td>\n <td>male</td>\n <td>http://commons.wikimedia.org/wiki/Special:File...</td>\n <td>Germany</td>\n <td>Erhard</td>\n <td>Blankenburg</td>\n <td>1938-10-30T00:00:00Z</td>\n <td>2018-03-28T00:00:00Z</td>\n <td>sociologist</td>\n <td>sociology of law</td>\n <td>64109592</td>\n <td>0000000110676109</td>\n <td>115459235</td>\n <td>https://en.wikipedia.org/wiki/Erhard%20Blanken...</td>\n </tr>\n <tr>\n <th>21</th>\n <td>Wolfgang Kaupen</td>\n <td>Q93221485</td>\n <td>male</td>\n <td>None</td>\n <td>None</td>\n <td>Wolfgang</td>\n <td>None</td>\n <td>1936-01-01T00:00:00Z</td>\n <td>1981-01-01T00:00:00Z</td>\n <td>sociologist</td>\n <td>sociology of law</td>\n <td>32919813</td>\n <td>0000000035495614</td>\n <td>124045405</td>\n <td>None</td>\n </tr>\n <tr>\n <th>22</th>\n <td>Rüdiger Lautmann</td>\n <td>Q91074</td>\n <td>male</td>\n <td>http://commons.wikimedia.org/wiki/Special:File...</td>\n <td>Germany</td>\n <td>Rüdiger</td>\n <td>None</td>\n <td>1935-12-22T00:00:00Z</td>\n <td>None</td>\n <td>LGBTQI+ rights activist</td>\n <td>homosexuality</td>\n <td>24732961</td>\n <td>000000011469331X</td>\n <td>120502208</td>\n <td>https://en.wikipedia.org/wiki/R%C3%BCdiger%20L...</td>\n </tr>\n <tr>\n <th>23</th>\n <td>Thilo Ramm</td>\n <td>Q59533838</td>\n <td>male</td>\n <td>None</td>\n <td>Germany</td>\n <td>Thilo</td>\n <td>Ramm</td>\n <td>1925-04-04T00:00:00Z</td>\n <td>2018-06-17T00:00:00Z</td>\n <td>legal scholar</td>\n <td>None</td>\n <td>9924244</td>\n <td>0000000108689541</td>\n <td>116327391</td>\n <td>None</td>\n </tr>\n <tr>\n <th>24</th>\n <td>Rudolf Wiethölter</td>\n <td>Q1512482</td>\n <td>male</td>\n <td>None</td>\n <td>Germany</td>\n <td>Rudolf</td>\n <td>None</td>\n <td>1929-07-17T00:00:00Z</td>\n <td>None</td>\n <td>university teacher</td>\n <td>None</td>\n <td>106974404</td>\n <td>0000000116961365</td>\n <td>1034437860</td>\n <td>None</td>\n </tr>\n <tr>\n <th>25</th>\n <td>Niklas Luhmann</td>\n <td>Q57238</td>\n <td>male</td>\n <td>http://commons.wikimedia.org/wiki/Special:File...</td>\n <td>Germany</td>\n <td>Niklas</td>\n <td>Luhmann</td>\n <td>1927-12-08T00:00:00Z</td>\n <td>1998-11-06T00:00:00Z</td>\n <td>cyberneticist</td>\n <td>sociology</td>\n <td>29546145</td>\n <td>0000000122778532</td>\n <td>118575147</td>\n <td>https://en.wikipedia.org/wiki/Niklas%20Luhmann</td>\n </tr>\n <tr>\n <th>26</th>\n <td>Niklas Luhmann</td>\n <td>Q85691627</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>researcher</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n </tr>\n <tr>\n <th>27</th>\n <td>Niklas Luhmann</td>\n <td>Q61803657</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>researcher</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n </tr>\n <tr>\n <th>28</th>\n <td>Gunther Teubner</td>\n <td>Q98304</td>\n <td>male</td>\n <td>http://commons.wikimedia.org/wiki/Special:File...</td>\n <td>Germany</td>\n <td>Gunther</td>\n <td>Teubner</td>\n <td>1944-04-30T00:00:00Z</td>\n <td>None</td>\n <td>jurist</td>\n <td>None</td>\n <td>108364502</td>\n <td>0000000109312017</td>\n <td>119443562</td>\n <td>https://en.wikipedia.org/wiki/Gunther%20Teubner</td>\n </tr>\n <tr>\n <th>29</th>\n <td>Volkmar Gessner</td>\n <td>Q15435946</td>\n <td>male</td>\n <td>http://commons.wikimedia.org/wiki/Special:File...</td>\n <td>Germany</td>\n <td>Volkmar</td>\n <td>Gessner</td>\n <td>1937-10-09T00:00:00Z</td>\n <td>2014-11-08T00:00:00Z</td>\n <td>sociologist</td>\n <td>sociology of law</td>\n <td>69100039</td>\n <td>0000000109127065</td>\n <td>170469328</td>\n <td>https://en.wikipedia.org/wiki/Volkmar%20Gessner</td>\n </tr>\n <tr>\n <th>30</th>\n <td>Konstanze Plett</td>\n <td>Q95192683</td>\n <td>female</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>1947-01-01T00:00:00Z</td>\n <td>None</td>\n <td>jurist</td>\n <td>None</td>\n <td>20628376</td>\n <td>0000000066847723</td>\n <td>124957048</td>\n <td>None</td>\n </tr>\n <tr>\n <th>31</th>\n <td>Ute Sacksofsky</td>\n <td>Q48562036</td>\n <td>female</td>\n <td>None</td>\n <td>Germany</td>\n <td>Ute</td>\n <td>None</td>\n <td>1960-01-01T00:00:00Z</td>\n <td>None</td>\n <td>university teacher</td>\n <td>comparative law</td>\n <td>54395802</td>\n <td>0000000109029166</td>\n <td>132505746</td>\n <td>None</td>\n </tr>\n <tr>\n <th>32</th>\n <td>Susanne Baer</td>\n <td>Q95656754</td>\n <td>female</td>\n <td>None</td>\n <td>None</td>\n <td>Susanne</td>\n <td>Baer</td>\n <td>None</td>\n <td>None</td>\n <td>actor</td>\n <td>None</td>\n <td>189145911193727062107</td>\n <td>None</td>\n <td>1093595477</td>\n <td>None</td>\n </tr>\n <tr>\n <th>33</th>\n <td>Susanne Baer</td>\n <td>Q101872</td>\n <td>female</td>\n <td>http://commons.wikimedia.org/wiki/Special:File...</td>\n <td>Germany</td>\n <td>Susanne</td>\n <td>Baer</td>\n <td>1964-02-16T00:00:00Z</td>\n <td>None</td>\n <td>judge</td>\n <td>None</td>\n <td>262465472</td>\n <td>0000000030338599</td>\n <td>113854161</td>\n <td>https://en.wikipedia.org/wiki/Susanne%20Baer</td>\n </tr>\n </tbody>\n</table>\n</div>" - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "Retrieving scholar data...\n" + ] + }, + { + "ename": "ConnectionError", + "evalue": "HTTPSConnectionPool(host='query.wikidata.org', port=443): Max retries exceeded with url: /sparql?query=%0ASELECT+DISTINCT+%3Fitem+%3FitemLabel%28SAMPLE%28%3FsexOrGender%29+AS+%3FsexOrGender%29%0A%28SAMPLE%28%3FcountryOfCitizenship%29+AS+%3FcountryOfCitizenship%29%0A%28SAMPLE%28%3FgivenName%29+AS+%3FgivenName%29%0A%28SAMPLE%28%3FfamilyName%29+AS+%3FfamilyName%29%0A%28SAMPLE%28%3FdateOfBirth%29+AS+%3FdateOfBirth%29%0A%28SAMPLE%28%3FdateOfDeath%29+AS+%3FdateOfDeath%29%0A%28SAMPLE%28%3Foccupation%29+AS+%3Foccupation%29%0A%28SAMPLE%28%3FfieldOfWork%29+AS+%3FfieldOfWork%29%0A%28SAMPLE%28%3Fviaf_id%29+AS+%3Fviaf_id%29%0A%28SAMPLE%28%3Fisni_id%29+AS+%3Fisni_id%29%0A%28SAMPLE%28%3Fgnd_id%29+AS+%3Fgnd_id%29+%0AWHERE+%7B%0A++++%3Fitem+wdt%3AP31+wd%3AQ5%3B+rdfs%3Alabel+%22Theodor+Geiger%22%40en+.%0A%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP21+%3FsexOrGenderId+.+%3FsexOrGenderId+rdfs%3Alabel+%3FsexOrGender+FILTER%28LANG%28%3FsexOrGender%29+%3D+%22en%22%29+.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP27+%3FcountryOfCitizenshipId+.+%3FcountryOfCitizenshipId+rdfs%3Alabel+%3FcountryOfCitizenship+FILTER%28LANG%28%3FcountryOfCitizenship%29+%3D+%22en%22%29+.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP735+%3FgivenNameId+.+%3FgivenNameId+rdfs%3Alabel+%3FgivenName+FILTER%28LANG%28%3FgivenName%29+%3D+%22en%22%29+.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP734+%3FfamilyNameId+.+%3FfamilyNameId+rdfs%3Alabel+%3FfamilyName+FILTER%28LANG%28%3FfamilyName%29+%3D+%22en%22%29+.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP569+%3FdateOfBirth.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP570+%3FdateOfDeath.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP106+%3FoccupationId+.+%3FoccupationId+rdfs%3Alabel+%3Foccupation+FILTER%28LANG%28%3Foccupation%29+%3D+%22en%22%29+.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP101+%3FfieldOfWorkId+.+%3FfieldOfWorkId+rdfs%3Alabel+%3FfieldOfWork+FILTER%28LANG%28%3FfieldOfWork%29+%3D+%22en%22%29+.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP214+%3Fviaf_id.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP213+%3Fisni_id.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP227+%3Fgnd_id.+%7D%0A%7D%0AGROUP+BY+%3Fitem+%3FitemLabel%0A&format=json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000027807EC3DD0>: Failed to establish a new connection: [WinError 10048] Normalerweise darf jede Socketadresse (Protokoll, Netzwerkadresse oder Anschluss) nur jeweils einmal verwendet werden'))", + "output_type": "error", + "traceback": [ + "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[1;31mOSError\u001B[0m Traceback (most recent call last)", + "File \u001B[1;32m~\\AppData\\Local\\miniconda3\\Lib\\site-packages\\urllib3\\connection.py:174\u001B[0m, in \u001B[0;36mHTTPConnection._new_conn\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m 173\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m--> 174\u001B[0m conn \u001B[38;5;241m=\u001B[39m connection\u001B[38;5;241m.\u001B[39mcreate_connection(\n\u001B[0;32m 175\u001B[0m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_dns_host, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mport), \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mtimeout, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mextra_kw\n\u001B[0;32m 176\u001B[0m )\n\u001B[0;32m 178\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m SocketTimeout:\n", + "File \u001B[1;32m~\\AppData\\Local\\miniconda3\\Lib\\site-packages\\urllib3\\util\\connection.py:95\u001B[0m, in \u001B[0;36mcreate_connection\u001B[1;34m(address, timeout, source_address, socket_options)\u001B[0m\n\u001B[0;32m 94\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m err \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[1;32m---> 95\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m err\n\u001B[0;32m 97\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m socket\u001B[38;5;241m.\u001B[39merror(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mgetaddrinfo returns an empty list\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n", + "File \u001B[1;32m~\\AppData\\Local\\miniconda3\\Lib\\site-packages\\urllib3\\util\\connection.py:85\u001B[0m, in \u001B[0;36mcreate_connection\u001B[1;34m(address, timeout, source_address, socket_options)\u001B[0m\n\u001B[0;32m 84\u001B[0m sock\u001B[38;5;241m.\u001B[39mbind(source_address)\n\u001B[1;32m---> 85\u001B[0m sock\u001B[38;5;241m.\u001B[39mconnect(sa)\n\u001B[0;32m 86\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m sock\n", + "\u001B[1;31mOSError\u001B[0m: [WinError 10048] Normalerweise darf jede Socketadresse (Protokoll, Netzwerkadresse oder Anschluss) nur jeweils einmal verwendet werden", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001B[1;31mNewConnectionError\u001B[0m Traceback (most recent call last)", + "File \u001B[1;32m~\\AppData\\Local\\miniconda3\\Lib\\site-packages\\urllib3\\connectionpool.py:715\u001B[0m, in \u001B[0;36mHTTPConnectionPool.urlopen\u001B[1;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001B[0m\n\u001B[0;32m 714\u001B[0m \u001B[38;5;66;03m# Make the request on the httplib connection object.\u001B[39;00m\n\u001B[1;32m--> 715\u001B[0m httplib_response \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_make_request(\n\u001B[0;32m 716\u001B[0m conn,\n\u001B[0;32m 717\u001B[0m method,\n\u001B[0;32m 718\u001B[0m url,\n\u001B[0;32m 719\u001B[0m timeout\u001B[38;5;241m=\u001B[39mtimeout_obj,\n\u001B[0;32m 720\u001B[0m body\u001B[38;5;241m=\u001B[39mbody,\n\u001B[0;32m 721\u001B[0m headers\u001B[38;5;241m=\u001B[39mheaders,\n\u001B[0;32m 722\u001B[0m chunked\u001B[38;5;241m=\u001B[39mchunked,\n\u001B[0;32m 723\u001B[0m )\n\u001B[0;32m 725\u001B[0m \u001B[38;5;66;03m# If we're going to release the connection in ``finally:``, then\u001B[39;00m\n\u001B[0;32m 726\u001B[0m \u001B[38;5;66;03m# the response doesn't need to know about the connection. Otherwise\u001B[39;00m\n\u001B[0;32m 727\u001B[0m \u001B[38;5;66;03m# it will also try to release it and we'll have a double-release\u001B[39;00m\n\u001B[0;32m 728\u001B[0m \u001B[38;5;66;03m# mess.\u001B[39;00m\n", + "File \u001B[1;32m~\\AppData\\Local\\miniconda3\\Lib\\site-packages\\urllib3\\connectionpool.py:404\u001B[0m, in \u001B[0;36mHTTPConnectionPool._make_request\u001B[1;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001B[0m\n\u001B[0;32m 403\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m--> 404\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_validate_conn(conn)\n\u001B[0;32m 405\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m (SocketTimeout, BaseSSLError) \u001B[38;5;28;01mas\u001B[39;00m e:\n\u001B[0;32m 406\u001B[0m \u001B[38;5;66;03m# Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.\u001B[39;00m\n", + "File \u001B[1;32m~\\AppData\\Local\\miniconda3\\Lib\\site-packages\\urllib3\\connectionpool.py:1058\u001B[0m, in \u001B[0;36mHTTPSConnectionPool._validate_conn\u001B[1;34m(self, conn)\u001B[0m\n\u001B[0;32m 1057\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28mgetattr\u001B[39m(conn, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124msock\u001B[39m\u001B[38;5;124m\"\u001B[39m, \u001B[38;5;28;01mNone\u001B[39;00m): \u001B[38;5;66;03m# AppEngine might not have `.sock`\u001B[39;00m\n\u001B[1;32m-> 1058\u001B[0m conn\u001B[38;5;241m.\u001B[39mconnect()\n\u001B[0;32m 1060\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m conn\u001B[38;5;241m.\u001B[39mis_verified:\n", + "File \u001B[1;32m~\\AppData\\Local\\miniconda3\\Lib\\site-packages\\urllib3\\connection.py:363\u001B[0m, in \u001B[0;36mHTTPSConnection.connect\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m 361\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mconnect\u001B[39m(\u001B[38;5;28mself\u001B[39m):\n\u001B[0;32m 362\u001B[0m \u001B[38;5;66;03m# Add certificate verification\u001B[39;00m\n\u001B[1;32m--> 363\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39msock \u001B[38;5;241m=\u001B[39m conn \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_new_conn()\n\u001B[0;32m 364\u001B[0m hostname \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mhost\n", + "File \u001B[1;32m~\\AppData\\Local\\miniconda3\\Lib\\site-packages\\urllib3\\connection.py:186\u001B[0m, in \u001B[0;36mHTTPConnection._new_conn\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m 185\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m SocketError \u001B[38;5;28;01mas\u001B[39;00m e:\n\u001B[1;32m--> 186\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m NewConnectionError(\n\u001B[0;32m 187\u001B[0m \u001B[38;5;28mself\u001B[39m, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mFailed to establish a new connection: \u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;241m%\u001B[39m e\n\u001B[0;32m 188\u001B[0m )\n\u001B[0;32m 190\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m conn\n", + "\u001B[1;31mNewConnectionError\u001B[0m: <urllib3.connection.HTTPSConnection object at 0x0000027807EC3DD0>: Failed to establish a new connection: [WinError 10048] Normalerweise darf jede Socketadresse (Protokoll, Netzwerkadresse oder Anschluss) nur jeweils einmal verwendet werden", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001B[1;31mMaxRetryError\u001B[0m Traceback (most recent call last)", + "File \u001B[1;32m~\\AppData\\Local\\miniconda3\\Lib\\site-packages\\requests\\adapters.py:486\u001B[0m, in \u001B[0;36mHTTPAdapter.send\u001B[1;34m(self, request, stream, timeout, verify, cert, proxies)\u001B[0m\n\u001B[0;32m 485\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m--> 486\u001B[0m resp \u001B[38;5;241m=\u001B[39m conn\u001B[38;5;241m.\u001B[39murlopen(\n\u001B[0;32m 487\u001B[0m method\u001B[38;5;241m=\u001B[39mrequest\u001B[38;5;241m.\u001B[39mmethod,\n\u001B[0;32m 488\u001B[0m url\u001B[38;5;241m=\u001B[39murl,\n\u001B[0;32m 489\u001B[0m body\u001B[38;5;241m=\u001B[39mrequest\u001B[38;5;241m.\u001B[39mbody,\n\u001B[0;32m 490\u001B[0m headers\u001B[38;5;241m=\u001B[39mrequest\u001B[38;5;241m.\u001B[39mheaders,\n\u001B[0;32m 491\u001B[0m redirect\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m,\n\u001B[0;32m 492\u001B[0m assert_same_host\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m,\n\u001B[0;32m 493\u001B[0m preload_content\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m,\n\u001B[0;32m 494\u001B[0m decode_content\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m,\n\u001B[0;32m 495\u001B[0m retries\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mmax_retries,\n\u001B[0;32m 496\u001B[0m timeout\u001B[38;5;241m=\u001B[39mtimeout,\n\u001B[0;32m 497\u001B[0m chunked\u001B[38;5;241m=\u001B[39mchunked,\n\u001B[0;32m 498\u001B[0m )\n\u001B[0;32m 500\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m (ProtocolError, \u001B[38;5;167;01mOSError\u001B[39;00m) \u001B[38;5;28;01mas\u001B[39;00m err:\n", + "File \u001B[1;32m~\\AppData\\Local\\miniconda3\\Lib\\site-packages\\urllib3\\connectionpool.py:799\u001B[0m, in \u001B[0;36mHTTPConnectionPool.urlopen\u001B[1;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001B[0m\n\u001B[0;32m 797\u001B[0m e \u001B[38;5;241m=\u001B[39m ProtocolError(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mConnection aborted.\u001B[39m\u001B[38;5;124m\"\u001B[39m, e)\n\u001B[1;32m--> 799\u001B[0m retries \u001B[38;5;241m=\u001B[39m retries\u001B[38;5;241m.\u001B[39mincrement(\n\u001B[0;32m 800\u001B[0m method, url, error\u001B[38;5;241m=\u001B[39me, _pool\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m, _stacktrace\u001B[38;5;241m=\u001B[39msys\u001B[38;5;241m.\u001B[39mexc_info()[\u001B[38;5;241m2\u001B[39m]\n\u001B[0;32m 801\u001B[0m )\n\u001B[0;32m 802\u001B[0m retries\u001B[38;5;241m.\u001B[39msleep()\n", + "File \u001B[1;32m~\\AppData\\Local\\miniconda3\\Lib\\site-packages\\urllib3\\util\\retry.py:592\u001B[0m, in \u001B[0;36mRetry.increment\u001B[1;34m(self, method, url, response, error, _pool, _stacktrace)\u001B[0m\n\u001B[0;32m 591\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m new_retry\u001B[38;5;241m.\u001B[39mis_exhausted():\n\u001B[1;32m--> 592\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m MaxRetryError(_pool, url, error \u001B[38;5;129;01mor\u001B[39;00m ResponseError(cause))\n\u001B[0;32m 594\u001B[0m log\u001B[38;5;241m.\u001B[39mdebug(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mIncremented Retry for (url=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m): \u001B[39m\u001B[38;5;132;01m%r\u001B[39;00m\u001B[38;5;124m\"\u001B[39m, url, new_retry)\n", + "\u001B[1;31mMaxRetryError\u001B[0m: HTTPSConnectionPool(host='query.wikidata.org', port=443): Max retries exceeded with url: /sparql?query=%0ASELECT+DISTINCT+%3Fitem+%3FitemLabel%28SAMPLE%28%3FsexOrGender%29+AS+%3FsexOrGender%29%0A%28SAMPLE%28%3FcountryOfCitizenship%29+AS+%3FcountryOfCitizenship%29%0A%28SAMPLE%28%3FgivenName%29+AS+%3FgivenName%29%0A%28SAMPLE%28%3FfamilyName%29+AS+%3FfamilyName%29%0A%28SAMPLE%28%3FdateOfBirth%29+AS+%3FdateOfBirth%29%0A%28SAMPLE%28%3FdateOfDeath%29+AS+%3FdateOfDeath%29%0A%28SAMPLE%28%3Foccupation%29+AS+%3Foccupation%29%0A%28SAMPLE%28%3FfieldOfWork%29+AS+%3FfieldOfWork%29%0A%28SAMPLE%28%3Fviaf_id%29+AS+%3Fviaf_id%29%0A%28SAMPLE%28%3Fisni_id%29+AS+%3Fisni_id%29%0A%28SAMPLE%28%3Fgnd_id%29+AS+%3Fgnd_id%29+%0AWHERE+%7B%0A++++%3Fitem+wdt%3AP31+wd%3AQ5%3B+rdfs%3Alabel+%22Theodor+Geiger%22%40en+.%0A%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP21+%3FsexOrGenderId+.+%3FsexOrGenderId+rdfs%3Alabel+%3FsexOrGender+FILTER%28LANG%28%3FsexOrGender%29+%3D+%22en%22%29+.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP27+%3FcountryOfCitizenshipId+.+%3FcountryOfCitizenshipId+rdfs%3Alabel+%3FcountryOfCitizenship+FILTER%28LANG%28%3FcountryOfCitizenship%29+%3D+%22en%22%29+.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP735+%3FgivenNameId+.+%3FgivenNameId+rdfs%3Alabel+%3FgivenName+FILTER%28LANG%28%3FgivenName%29+%3D+%22en%22%29+.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP734+%3FfamilyNameId+.+%3FfamilyNameId+rdfs%3Alabel+%3FfamilyName+FILTER%28LANG%28%3FfamilyName%29+%3D+%22en%22%29+.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP569+%3FdateOfBirth.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP570+%3FdateOfDeath.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP106+%3FoccupationId+.+%3FoccupationId+rdfs%3Alabel+%3Foccupation+FILTER%28LANG%28%3Foccupation%29+%3D+%22en%22%29+.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP101+%3FfieldOfWorkId+.+%3FfieldOfWorkId+rdfs%3Alabel+%3FfieldOfWork+FILTER%28LANG%28%3FfieldOfWork%29+%3D+%22en%22%29+.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP214+%3Fviaf_id.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP213+%3Fisni_id.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP227+%3Fgnd_id.+%7D%0A%7D%0AGROUP+BY+%3Fitem+%3FitemLabel%0A&format=json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000027807EC3DD0>: Failed to establish a new connection: [WinError 10048] Normalerweise darf jede Socketadresse (Protokoll, Netzwerkadresse oder Anschluss) nur jeweils einmal verwendet werden'))", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001B[1;31mConnectionError\u001B[0m Traceback (most recent call last)", + "Cell \u001B[1;32mIn[22], line 41\u001B[0m\n\u001B[0;32m 2\u001B[0m property_labels_to_ids \u001B[38;5;241m=\u001B[39m {\n\u001B[0;32m 3\u001B[0m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124msexOrGender\u001B[39m\u001B[38;5;124m'\u001B[39m: \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mP21\u001B[39m\u001B[38;5;124m'\u001B[39m,\n\u001B[0;32m 4\u001B[0m \u001B[38;5;66;03m# 'image': 'P18',\u001B[39;00m\n\u001B[1;32m (...)\u001B[0m\n\u001B[0;32m 14\u001B[0m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mgnd_id\u001B[39m\u001B[38;5;124m'\u001B[39m: \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mP227\u001B[39m\u001B[38;5;124m'\u001B[39m\n\u001B[0;32m 15\u001B[0m }\n\u001B[0;32m 17\u001B[0m scholars \u001B[38;5;241m=\u001B[39m [\n\u001B[0;32m 18\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mHans Kelsen\u001B[39m\u001B[38;5;124m\"\u001B[39m,\n\u001B[0;32m 19\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mHugo Sinzheimer\u001B[39m\u001B[38;5;124m\"\u001B[39m,\n\u001B[1;32m (...)\u001B[0m\n\u001B[0;32m 39\u001B[0m (\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mSusanne Baer\u001B[39m\u001B[38;5;124m\"\u001B[39m,\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mQ101872\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[0;32m 40\u001B[0m ]\n\u001B[1;32m---> 41\u001B[0m df \u001B[38;5;241m=\u001B[39m get_person_info_from_wikidata(scholars, property_labels_to_ids)\n\u001B[0;32m 42\u001B[0m df\n", + "Cell \u001B[1;32mIn[20], line 133\u001B[0m, in \u001B[0;36mget_person_info_from_wikidata\u001B[1;34m(names, property_map, languages)\u001B[0m\n\u001B[0;32m 131\u001B[0m results \u001B[38;5;241m=\u001B[39m query_wikidata(item[\u001B[38;5;241m0\u001B[39m], property_map, languages[\u001B[38;5;241m0\u001B[39m], qid\u001B[38;5;241m=\u001B[39mitem[\u001B[38;5;241m1\u001B[39m])\n\u001B[0;32m 132\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m--> 133\u001B[0m results \u001B[38;5;241m=\u001B[39m query_wikidata(item, property_map, languages[\u001B[38;5;241m0\u001B[39m])\n\u001B[0;32m 135\u001B[0m all_data \u001B[38;5;241m+\u001B[39m\u001B[38;5;241m=\u001B[39m results\n\u001B[0;32m 136\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(all_data) \u001B[38;5;241m>\u001B[39m \u001B[38;5;241m0\u001B[39m:\n\u001B[0;32m 137\u001B[0m \u001B[38;5;66;03m# Ensure fullName appears first by reordering columns based on property_labels_to_ids keys\u001B[39;00m\n", + "Cell \u001B[1;32mIn[20], line 57\u001B[0m, in \u001B[0;36mquery_wikidata\u001B[1;34m(fullName, property_map, language, qid)\u001B[0m\n\u001B[0;32m 55\u001B[0m query \u001B[38;5;241m=\u001B[39m generate_sparql_query(fullName, property_map, language, qid\u001B[38;5;241m=\u001B[39mqid)\n\u001B[0;32m 56\u001B[0m headers \u001B[38;5;241m=\u001B[39m {\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mUser-Agent\u001B[39m\u001B[38;5;124m'\u001B[39m: \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mMozilla/5.0\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mAccept\u001B[39m\u001B[38;5;124m'\u001B[39m: \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mapplication/json\u001B[39m\u001B[38;5;124m'\u001B[39m}\n\u001B[1;32m---> 57\u001B[0m response \u001B[38;5;241m=\u001B[39m requests\u001B[38;5;241m.\u001B[39mget(SPARQL_ENDPOINT, headers\u001B[38;5;241m=\u001B[39mheaders, params\u001B[38;5;241m=\u001B[39m{\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mquery\u001B[39m\u001B[38;5;124m'\u001B[39m: query, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mformat\u001B[39m\u001B[38;5;124m'\u001B[39m: \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mjson\u001B[39m\u001B[38;5;124m'\u001B[39m})\n\u001B[0;32m 59\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m response\u001B[38;5;241m.\u001B[39mstatus_code \u001B[38;5;241m!=\u001B[39m \u001B[38;5;241m200\u001B[39m:\n\u001B[0;32m 60\u001B[0m \u001B[38;5;28mprint\u001B[39m(query)\n", + "File \u001B[1;32m~\\AppData\\Local\\miniconda3\\Lib\\site-packages\\requests\\api.py:73\u001B[0m, in \u001B[0;36mget\u001B[1;34m(url, params, **kwargs)\u001B[0m\n\u001B[0;32m 62\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mget\u001B[39m(url, params\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs):\n\u001B[0;32m 63\u001B[0m \u001B[38;5;250m \u001B[39m\u001B[38;5;124mr\u001B[39m\u001B[38;5;124;03m\"\"\"Sends a GET request.\u001B[39;00m\n\u001B[0;32m 64\u001B[0m \n\u001B[0;32m 65\u001B[0m \u001B[38;5;124;03m :param url: URL for the new :class:`Request` object.\u001B[39;00m\n\u001B[1;32m (...)\u001B[0m\n\u001B[0;32m 70\u001B[0m \u001B[38;5;124;03m :rtype: requests.Response\u001B[39;00m\n\u001B[0;32m 71\u001B[0m \u001B[38;5;124;03m \"\"\"\u001B[39;00m\n\u001B[1;32m---> 73\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m request(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mget\u001B[39m\u001B[38;5;124m\"\u001B[39m, url, params\u001B[38;5;241m=\u001B[39mparams, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n", + "File \u001B[1;32m~\\AppData\\Local\\miniconda3\\Lib\\site-packages\\requests\\api.py:59\u001B[0m, in \u001B[0;36mrequest\u001B[1;34m(method, url, **kwargs)\u001B[0m\n\u001B[0;32m 55\u001B[0m \u001B[38;5;66;03m# By using the 'with' statement we are sure the session is closed, thus we\u001B[39;00m\n\u001B[0;32m 56\u001B[0m \u001B[38;5;66;03m# avoid leaving sockets open which can trigger a ResourceWarning in some\u001B[39;00m\n\u001B[0;32m 57\u001B[0m \u001B[38;5;66;03m# cases, and look like a memory leak in others.\u001B[39;00m\n\u001B[0;32m 58\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m sessions\u001B[38;5;241m.\u001B[39mSession() \u001B[38;5;28;01mas\u001B[39;00m session:\n\u001B[1;32m---> 59\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m session\u001B[38;5;241m.\u001B[39mrequest(method\u001B[38;5;241m=\u001B[39mmethod, url\u001B[38;5;241m=\u001B[39murl, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n", + "File \u001B[1;32m~\\AppData\\Local\\miniconda3\\Lib\\site-packages\\requests\\sessions.py:589\u001B[0m, in \u001B[0;36mSession.request\u001B[1;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001B[0m\n\u001B[0;32m 584\u001B[0m send_kwargs \u001B[38;5;241m=\u001B[39m {\n\u001B[0;32m 585\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mtimeout\u001B[39m\u001B[38;5;124m\"\u001B[39m: timeout,\n\u001B[0;32m 586\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mallow_redirects\u001B[39m\u001B[38;5;124m\"\u001B[39m: allow_redirects,\n\u001B[0;32m 587\u001B[0m }\n\u001B[0;32m 588\u001B[0m send_kwargs\u001B[38;5;241m.\u001B[39mupdate(settings)\n\u001B[1;32m--> 589\u001B[0m resp \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39msend(prep, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39msend_kwargs)\n\u001B[0;32m 591\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m resp\n", + "File \u001B[1;32m~\\AppData\\Local\\miniconda3\\Lib\\site-packages\\requests\\sessions.py:703\u001B[0m, in \u001B[0;36mSession.send\u001B[1;34m(self, request, **kwargs)\u001B[0m\n\u001B[0;32m 700\u001B[0m start \u001B[38;5;241m=\u001B[39m preferred_clock()\n\u001B[0;32m 702\u001B[0m \u001B[38;5;66;03m# Send the request\u001B[39;00m\n\u001B[1;32m--> 703\u001B[0m r \u001B[38;5;241m=\u001B[39m adapter\u001B[38;5;241m.\u001B[39msend(request, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m 705\u001B[0m \u001B[38;5;66;03m# Total elapsed time of the request (approximately)\u001B[39;00m\n\u001B[0;32m 706\u001B[0m elapsed \u001B[38;5;241m=\u001B[39m preferred_clock() \u001B[38;5;241m-\u001B[39m start\n", + "File \u001B[1;32m~\\AppData\\Local\\miniconda3\\Lib\\site-packages\\requests\\adapters.py:519\u001B[0m, in \u001B[0;36mHTTPAdapter.send\u001B[1;34m(self, request, stream, timeout, verify, cert, proxies)\u001B[0m\n\u001B[0;32m 515\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(e\u001B[38;5;241m.\u001B[39mreason, _SSLError):\n\u001B[0;32m 516\u001B[0m \u001B[38;5;66;03m# This branch is for urllib3 v1.22 and later.\u001B[39;00m\n\u001B[0;32m 517\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m SSLError(e, request\u001B[38;5;241m=\u001B[39mrequest)\n\u001B[1;32m--> 519\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mConnectionError\u001B[39;00m(e, request\u001B[38;5;241m=\u001B[39mrequest)\n\u001B[0;32m 521\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m ClosedPoolError \u001B[38;5;28;01mas\u001B[39;00m e:\n\u001B[0;32m 522\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mConnectionError\u001B[39;00m(e, request\u001B[38;5;241m=\u001B[39mrequest)\n", + "\u001B[1;31mConnectionError\u001B[0m: HTTPSConnectionPool(host='query.wikidata.org', port=443): Max retries exceeded with url: /sparql?query=%0ASELECT+DISTINCT+%3Fitem+%3FitemLabel%28SAMPLE%28%3FsexOrGender%29+AS+%3FsexOrGender%29%0A%28SAMPLE%28%3FcountryOfCitizenship%29+AS+%3FcountryOfCitizenship%29%0A%28SAMPLE%28%3FgivenName%29+AS+%3FgivenName%29%0A%28SAMPLE%28%3FfamilyName%29+AS+%3FfamilyName%29%0A%28SAMPLE%28%3FdateOfBirth%29+AS+%3FdateOfBirth%29%0A%28SAMPLE%28%3FdateOfDeath%29+AS+%3FdateOfDeath%29%0A%28SAMPLE%28%3Foccupation%29+AS+%3Foccupation%29%0A%28SAMPLE%28%3FfieldOfWork%29+AS+%3FfieldOfWork%29%0A%28SAMPLE%28%3Fviaf_id%29+AS+%3Fviaf_id%29%0A%28SAMPLE%28%3Fisni_id%29+AS+%3Fisni_id%29%0A%28SAMPLE%28%3Fgnd_id%29+AS+%3Fgnd_id%29+%0AWHERE+%7B%0A++++%3Fitem+wdt%3AP31+wd%3AQ5%3B+rdfs%3Alabel+%22Theodor+Geiger%22%40en+.%0A%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP21+%3FsexOrGenderId+.+%3FsexOrGenderId+rdfs%3Alabel+%3FsexOrGender+FILTER%28LANG%28%3FsexOrGender%29+%3D+%22en%22%29+.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP27+%3FcountryOfCitizenshipId+.+%3FcountryOfCitizenshipId+rdfs%3Alabel+%3FcountryOfCitizenship+FILTER%28LANG%28%3FcountryOfCitizenship%29+%3D+%22en%22%29+.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP735+%3FgivenNameId+.+%3FgivenNameId+rdfs%3Alabel+%3FgivenName+FILTER%28LANG%28%3FgivenName%29+%3D+%22en%22%29+.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP734+%3FfamilyNameId+.+%3FfamilyNameId+rdfs%3Alabel+%3FfamilyName+FILTER%28LANG%28%3FfamilyName%29+%3D+%22en%22%29+.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP569+%3FdateOfBirth.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP570+%3FdateOfDeath.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP106+%3FoccupationId+.+%3FoccupationId+rdfs%3Alabel+%3Foccupation+FILTER%28LANG%28%3Foccupation%29+%3D+%22en%22%29+.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP101+%3FfieldOfWorkId+.+%3FfieldOfWorkId+rdfs%3Alabel+%3FfieldOfWork+FILTER%28LANG%28%3FfieldOfWork%29+%3D+%22en%22%29+.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP214+%3Fviaf_id.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP213+%3Fisni_id.+%7D%0A++++++++++++OPTIONAL+%7B+%3Fitem+wdt%3AP227+%3Fgnd_id.+%7D%0A%7D%0AGROUP+BY+%3Fitem+%3FitemLabel%0A&format=json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000027807EC3DD0>: Failed to establish a new connection: [WinError 10048] Normalerweise darf jede Socketadresse (Protokoll, Netzwerkadresse oder Anschluss) nur jeweils einmal verwendet werden'))" + ] } ], "source": [ @@ -180,26 +232,26 @@ "scholars = [\n", " \"Hans Kelsen\",\n", " \"Hugo Sinzheimer\",\n", - " \"Karl Renner\",\n", - " \"Ernst Fraenkel\",\n", - " \"Franz Leopold Neumann\",\n", + " (\"Karl Renner\",\"Q11726\"),\n", + " (\"Ernst Fraenkel\", \"Q86812\"),\n", + " (\"Franz Leopold Neumann\", \"Q63195\"),\n", " \"Otto Kahn-Freund\",\n", " \"Otto Kirchheimer\",\n", " \"Herrmann Kantorowicz\",\n", - " \"Ludwig Bendix\",\n", - " \"Arthur Nussbaum\",\n", + " (\"Ludwig Bendix\", \"Q28053205\"),\n", + " (\"Arthur Nussbaum\", \"Q103088\"),\n", " \"Theodor Geiger\",\n", " \"Erhard Blankenburg\",\n", " \"Wolfgang Kaupen\",\n", " \"Rüdiger Lautmann\",\n", " \"Thilo Ramm\",\n", " \"Rudolf Wiethölter\",\n", - " \"Niklas Luhmann\",\n", + " (\"Niklas Luhmann\",\"Q57238\"),\n", " \"Gunther Teubner\",\n", " \"Volkmar Gessner\",\n", " \"Konstanze Plett\",\n", " \"Ute Sacksofsky\",\n", - " \"Susanne Baer\"\n", + " (\"Susanne Baer\",\"Q101872\")\n", "]\n", "df = get_person_info_from_wikidata(scholars, property_labels_to_ids)\n", "df" @@ -207,8 +259,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-03-10T21:46:22.256487300Z", - "start_time": "2024-03-10T21:45:35.629676Z" + "end_time": "2024-03-11T17:39:31.164638600Z", + "start_time": "2024-03-11T17:39:24.677965500Z" } }, "id": "19ddabbda261cc90" @@ -218,7 +270,7 @@ "execution_count": null, "outputs": [], "source": [ - "from lib.dnb_data import " + "from lib.dnb_data import get_publications" ], "metadata": { "collapsed": false @@ -227,16 +279,28 @@ }, { "cell_type": "code", - "execution_count": 21, - "outputs": [], + "execution_count": 3, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'df' is not defined", + "output_type": "error", + "traceback": [ + "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[1;31mNameError\u001B[0m Traceback (most recent call last)", + "Cell \u001B[1;32mIn[3], line 1\u001B[0m\n\u001B[1;32m----> 1\u001B[0m df\u001B[38;5;241m.\u001B[39mto_csv(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mscholars.csv\u001B[39m\u001B[38;5;124m\"\u001B[39m, index\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m)\n", + "\u001B[1;31mNameError\u001B[0m: name 'df' is not defined" + ] + } + ], "source": [ "df.to_csv(\"scholars.csv\", index=False)" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-03-10T21:46:57.602283400Z", - "start_time": "2024-03-10T21:46:57.590155600Z" + "end_time": "2024-03-11T13:10:04.016364200Z", + "start_time": "2024-03-11T13:10:02.070023900Z" } }, "id": "c6c0cc347c8788d0" -- GitLab