Skip to content
Snippets Groups Projects
Commit c7207861 authored by cboulanger's avatar cboulanger
Browse files

Fix output, add UK scholars

parent d65c3ae2
No related branches found
No related tags found
No related merge requests found
# based on code written by GPT-4
import csv
import os
import mwclient
import os.path
import requests
import pandas as pd
import textwrap
import re
from pywikibot import Claim, WbTime, ItemPage, PropertyPage, Site
from datetime import datetime
import mwclient
from dotenv import load_dotenv
load_dotenv()
def generate_sparql_query(fullName, property_labels_to_ids, language='en', qid=None, include_description=False):
"""
Query WikiData for the properties of the given person listed in the given property map,
either by fullName or QID. When a QID is provided, ?itemLabel is not included in the query.
:param fullName: Name of the person to query
:param property_labels_to_ids: Dictionary mapping property labels to WikiData property IDs
:param language: Language code for the query results
:param qid: WikiData entity ID (QID) for the person
:return: SPARQL query string
"""
selectClause = "SELECT DISTINCT ?item"
groupByClause = "GROUP BY ?item"
if qid:
#selectClause = "SELECT DISTINCT ?item"
itemConstraint = f"BIND(wd:{qid} AS ?item)."
#groupByClause = "GROUP BY ?item"
else:
#selectClause = "SELECT DISTINCT ?item"
itemConstraint = f'?item wdt:P31 wd:Q5; rdfs:label "{fullName}"@{language} .'
#groupByClause = "GROUP BY ?item"
if include_description:
selectClause += " ?itemdesc"
itemConstraint += f'''
OPTIONAL {{ ?item schema:description ?itemdesc. FILTER(LANG(?itemdesc) = "{language}") }}'''
groupByClause += " ?itemdesc"
for label, pid in property_labels_to_ids.items():
# add to property selection
if label.endswith('Name'):
selectClause += f'''
(GROUP_CONCAT(DISTINCT ?{label}Value; separator=" ") AS ?{label})'''
else:
selectClause += f'''
(SAMPLE(?{label}) AS ?{label})'''
# add to item constraint
if label.endswith("_id") or label.startswith("image") or label.startswith("date"):
itemConstraint += f"""
OPTIONAL {{ ?item wdt:{pid} ?{label}. }}"""
elif label.endswith("Name"):
itemConstraint += f"""
OPTIONAL {{
?item p:{pid} ?{label}Statement.
?{label}Statement ps:{pid} ?{label}.
OPTIONAL {{ ?{label}Statement pq:P1545 ?order. }}
OPTIONAL {{
?{label} rdfs:label ?{label}Label.
FILTER(LANG(?{label}Label) = "{language}")
}}
BIND(COALESCE(?{label}Label, STR(?{label})) AS ?{label}Value)
}}"""
else:
itemConstraint += f"""
OPTIONAL {{ ?item wdt:{pid} ?{label}Id . ?{label}Id rdfs:label ?{label} FILTER(LANG(?{label}) = "{language}") . }}"""
query = textwrap.dedent(f"""
{selectClause}
WHERE {{
{itemConstraint}
}}
{groupByClause}
""")
return query
def query_wikidata(fullName, property_map, language='en', qid=None, debug=False, include_description=False):
SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"
query = generate_sparql_query(fullName, property_map, language, qid=qid, include_description=include_description)
if debug:
print(query)
headers = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'}
response = requests.get(SPARQL_ENDPOINT, headers=headers, params={'query': query, 'format': 'json'})
if response.status_code != 200:
print(query)
response.raise_for_status()
results = response.json()['results']['bindings']
if not results:
return []
for i, result in enumerate(results):
# Initialize with fullName to ensure it appears first
data = {
'fullName': fullName
}
labels = list(property_map.keys())
if include_description:
labels.append('itemdesc')
for label in labels:
if label in result:
value = result[label]['value']
data[label] = value
else:
data[label] = None
# add qid and item URI
data['qid'] = os.path.basename(result['item']['value'])
data['wikidata_url'] = result['item']['value']
results[i] = data
return results
def get_wikipedia_links(qid, languages):
"""
Fetch Wikipedia links for a given Wikidata QID and a list of languages.
Parameters:
- qid (str): The QID of the Wikidata item.
- languages (list): A list of language codes (e.g., ['en', 'de']).
Returns:
- dict: A dictionary with languages as keys and Wikipedia URLs as values.
"""
url = "https://www.wikidata.org/w/api.php"
params = {
"action": "wbgetentities",
"ids": qid,
"props": "sitelinks/urls",
"format": "json"
}
response = requests.get(url, params=params)
data = response.json()
links = {}
if "entities" in data and qid in data["entities"]:
sitelinks = data["entities"][qid].get("sitelinks", {})
for lang in languages:
sitekey = f"{lang}wiki"
if sitekey in sitelinks:
siteLinkData = sitelinks.get(sitekey)
if 'url' in siteLinkData:
links[lang] = siteLinkData.get('url')
else:
# Use the 'title' key and construct the URL manually
title = sitelinks[sitekey]["title"]
links[lang] = f"https://{lang}.wikipedia.org/wiki/{requests.utils.quote(title)}"
else:
links[lang] = None # Or use '' to represent absence of link
return links
def extract_name_qid_with_regex(strings):
pattern = re.compile(r'^(.+?)(?: \(?Q(\d+)\)?)? *$')
result = []
for s in strings:
if match := pattern.search(s.strip()):
name = match.group(1).strip()
qid = 'Q' + match.group(2) if match.group(2) else None
result.append((name, qid))
return result
def get_person_info_from_wikidata(names: list,
property_map: dict,
languages: list = None,
debug=False,
include_description=False) -> pd.DataFrame:
"""
Given a list of "Name (QID)" strings, return the property values stored in wikidata, including wikipedia page links
Args:
names:
a list of strings in the format "Name (QID)". "(QID") is optional. If left out, the result will contain all
items having that name
property_map:
a dict mapping names of the property to PIDs
languages:
a list of languages for which to retrieve the wikipedia page URL, if it exists
debug:
if true, output debug information
Returns:
A dataframe with the property names as column names
"""
if languages is None:
languages = ['en', 'de']
language = languages[0]
all_data = []
print('Retrieving scholar data...')
for name, qid in extract_name_qid_with_regex(names):
all_data += query_wikidata(name,
property_map=property_map,
language=language,
qid=qid,
include_description=include_description,
debug=debug)
# Ensure fullName appears first by reordering columns based on property_labels_to_ids keys
columns = (['fullName', 'qid'] +
(['itemdesc'] if include_description else []) +
list(property_map.keys()) +
['wikidata_url'] + [f'wikipedia_{l}' for l in languages])
if len(all_data) > 0:
df = pd.DataFrame(all_data, columns=columns, dtype=str)
# Add wikipedia links
print("Retrieving wikipedia URLs...")
# For each QID in the DataFrame, fetch Wikipedia links for all languages and update the DataFrame accordingly
for index, row in df.iterrows():
qid = row['qid']
links = get_wikipedia_links(qid, languages)
# Update the DataFrame directly with the fetched links for each language
for language in languages:
df.at[index, f'wikipedia_{language}'] = links.get(language, None)
else:
df = pd.DataFrame(columns=columns, dtype=str)
return df
def claim_to_string(claim):
subject_qid = claim.on_item.id
predicate_pid = claim.getID()
......@@ -187,4 +415,34 @@ def get_wikipedia_page_data(pageTitle: str, language="en"):
'revision': page.revision,
'url': f'{site.host}/wiki/{pageTitle.replace(" ", "_")}?oldid={page.revision}',
'content': page.text()
}
\ No newline at end of file
}
# Function to convert URLs to HTML links
def make_clickable(val, name):
if val:
return f'<a target="_blank" href="{val}">{name}</a>'
else:
return ""
def format_name(name, date_birth, date_death):
return f'{name} ({"" if pd.isna(date_birth) else date_birth.year}-{"" if pd.isna(date_death) else date_death.year})'
def format_language_codes(name):
# Find all occurrences of language codes (e.g., _en, _de) and transform them to uppercase within parentheses
return re.sub(r'(.*?)_([a-z]{2})', lambda m: f" ({m.group(2).upper()})", name)
def create_styled_table(df: pd.DataFrame, include_rows:list):
df = df.copy()
df['fullName'] = df.apply(lambda r: format_name(r['fullName'], r['dateOfBirth'], r['dateOfDeath']), axis=1)
df = df[include_rows]
for col in df.columns:
if col.startswith('wiki'):
if col.startswith('wikipedia'):
link_name = 'WP ' + format_language_codes(col)
else:
link_name = "Wikidata"
df.loc[:, col] = df.loc[:, col].apply(make_clickable, name=link_name)
return df
fullName,qid,itemdesc,sexOrGender,familyName,givenName,dateOfBirth,dateOfDeath,wikidata_url,wikipedia_en,wikipedia_de
Karl Renner,Q11726,first President of Austria (1870–1950),male,Renner,Karl,1870-12-14 00:00:00+00:00,1950-12-31 00:00:00+00:00,http://www.wikidata.org/entity/Q11726,https://en.wikipedia.org/wiki/Karl_Renner,https://de.wikipedia.org/wiki/Karl_Renner
Hugo Sinzheimer,Q86043,German politician (1875-1945),male,Sinzheimer,Hugo D.,1875-01-01 00:00:00+00:00,1945-09-16 00:00:00+00:00,http://www.wikidata.org/entity/Q86043,https://en.wikipedia.org/wiki/Hugo_Sinzheimer,https://de.wikipedia.org/wiki/Hugo_Sinzheimer
Arthur Nussbaum,Q103088,German American jurist,male,Nussbaum,Arthur,1877-01-01 00:00:00+00:00,1964-01-01 00:00:00+00:00,http://www.wikidata.org/entity/Q103088,https://en.wikipedia.org/wiki/Arthur_Nussbaum,https://de.wikipedia.org/wiki/Arthur_Nussbaum
Ludwig Bendix,Q15449424,"German economist, civil law notary and lawyer (1877–1954)",male,Bendix,Ludwig,1877-06-28 00:00:00+00:00,1954-01-03 00:00:00+00:00,http://www.wikidata.org/entity/Q15449424,,https://de.wikipedia.org/wiki/Ludwig_Bendix
Hans Kelsen,Q84165,Austrian lawyer,male,Kelsen,Hans,1881-10-11 00:00:00+00:00,1973-04-19 00:00:00+00:00,http://www.wikidata.org/entity/Q84165,https://en.wikipedia.org/wiki/Hans_Kelsen,https://de.wikipedia.org/wiki/Hans_Kelsen
Theodor Geiger,Q96410,German sociologist (1891-1952),male,Geiger,Theodor,1891-11-09 00:00:00+00:00,1952-06-16 00:00:00+00:00,http://www.wikidata.org/entity/Q96410,https://en.wikipedia.org/wiki/Theodor_Geiger,https://de.wikipedia.org/wiki/Theodor_Geiger
Ernst Fraenkel,Q86812,political scientist (1898-1975),male,Fraenkel,Ernst,1898-12-26 00:00:00+00:00,1975-03-28 00:00:00+00:00,http://www.wikidata.org/entity/Q86812,https://en.wikipedia.org/wiki/Ernst_Fraenkel_(political_scientist),https://de.wikipedia.org/wiki/Ernst_Fraenkel_(Politikwissenschaftler)
Franz Leopold Neumann,Q63195,German political activist,male,Neumann,Leopold Franz,1900-05-23 00:00:00+00:00,1954-09-02 00:00:00+00:00,http://www.wikidata.org/entity/Q63195,https://en.wikipedia.org/wiki/Franz_Neumann_(political_scientist),https://de.wikipedia.org/wiki/Franz_Neumann_(Politikwissenschaftler)
Otto Kahn-Freund,Q121832,German-British jurist,male,Kahn Freund,Otto,1900-11-17 00:00:00+00:00,1979-06-16 00:00:00+00:00,http://www.wikidata.org/entity/Q121832,https://en.wikipedia.org/wiki/Otto_Kahn-Freund,https://de.wikipedia.org/wiki/Otto_Kahn-Freund
Ernst Eduard Hirsch,Q107033,German judge (1902-1985),male,Hirsch,Ernst,1902-01-20 00:00:00+00:00,1985-03-29 00:00:00+00:00,http://www.wikidata.org/entity/Q107033,,https://de.wikipedia.org/wiki/Ernst_Eduard_Hirsch
Otto Kirchheimer,Q214397,German-American legal scholar,male,Kirchheimer,Otto,1905-11-11 00:00:00+00:00,1965-11-22 00:00:00+00:00,http://www.wikidata.org/entity/Q214397,https://en.wikipedia.org/wiki/Otto_Kirchheimer,https://de.wikipedia.org/wiki/Otto_Kirchheimer
Helmut Schelsky,Q104272,German sociologist (1912-1984),male,,Helmut,1912-10-14 00:00:00+00:00,1984-02-24 00:00:00+00:00,http://www.wikidata.org/entity/Q104272,https://en.wikipedia.org/wiki/Helmut_Schelsky,https://de.wikipedia.org/wiki/Helmut_Schelsky
Hans Ryffel,Q21035905,(1913-1989),male,Ryffel,Hans,1913-06-27 00:00:00+00:00,1989-09-30 00:00:00+00:00,http://www.wikidata.org/entity/Q21035905,,https://de.wikipedia.org/wiki/Hans_Ryffel_(Rechtsphilosoph)
Theo Rasehorn,Q1304659,German judge and author,male,,Theo,1918-10-26 00:00:00+00:00,2016-01-16 00:00:00+00:00,http://www.wikidata.org/entity/Q1304659,,https://de.wikipedia.org/wiki/Theo_Rasehorn
Rudolf Wassermann,Q1551290,German judge (1925-2008),male,Wassermann,Rudolf,1925-01-05 00:00:00+00:00,2008-06-13 00:00:00+00:00,http://www.wikidata.org/entity/Q1551290,,https://de.wikipedia.org/wiki/Rudolf_Wassermann
Thilo Ramm,Q59533838,German legal scholar and author,male,Ramm,Thilo,1925-04-04 00:00:00+00:00,2018-06-17 00:00:00+00:00,http://www.wikidata.org/entity/Q59533838,,https://de.wikipedia.org/wiki/Thilo_Ramm
Niklas Luhmann,Q57238,"German sociologist, administration expert, and social systems theorist (1927-1998)",male,Luhmann,Niklas,1927-12-08 00:00:00+00:00,1998-11-06 00:00:00+00:00,http://www.wikidata.org/entity/Q57238,https://en.wikipedia.org/wiki/Niklas_Luhmann,https://de.wikipedia.org/wiki/Niklas_Luhmann
Rudolf Wiethölter,Q1512482,German jurist,male,,Rudolf,1929-07-17 00:00:00+00:00,,http://www.wikidata.org/entity/Q1512482,,https://de.wikipedia.org/wiki/Rudolf_Wieth%C3%B6lter
Günter Dux,Q1560417,German sociologist,male,Dux,Günter,1933-06-23 00:00:00+00:00,,http://www.wikidata.org/entity/Q1560417,,https://de.wikipedia.org/wiki/G%C3%BCnter_Dux
Jutta Limbach,Q72551,German judge and politician (SPD) (1934-2016),female,Limbach,Jutta,1934-03-27 00:00:00+00:00,2016-09-10 00:00:00+00:00,http://www.wikidata.org/entity/Q72551,https://en.wikipedia.org/wiki/Jutta_Limbach,https://de.wikipedia.org/wiki/Jutta_Limbach
Thomas Raiser,Q27909309,,male,,Thomas,1935-02-20 00:00:00+00:00,,http://www.wikidata.org/entity/Q27909309,,https://de.wikipedia.org/wiki/Thomas_Raiser
Manfred Rehbinder,Q1889820,German jurist,male,,Manfred,1935-03-22 00:00:00+00:00,,http://www.wikidata.org/entity/Q1889820,,https://de.wikipedia.org/wiki/Manfred_Rehbinder
Rüdiger Lautmann,Q91074,German sociologist and LGBT researcher,male,,Rüdiger,1935-12-22 00:00:00+00:00,,http://www.wikidata.org/entity/Q91074,https://en.wikipedia.org/wiki/R%C3%BCdiger_Lautmann,https://de.wikipedia.org/wiki/R%C3%BCdiger_Lautmann
Wolfgang Kaupen,Q93221485,,male,,Wolfgang,1936-01-01 00:00:00+00:00,1981-01-01 00:00:00+00:00,http://www.wikidata.org/entity/Q93221485,,
Volkmar Gessner,Q15435946,University professor,male,Gessner,Volkmar,1937-10-09 00:00:00+00:00,2014-11-08 00:00:00+00:00,http://www.wikidata.org/entity/Q15435946,https://en.wikipedia.org/wiki/Volkmar_Gessner,https://de.wikipedia.org/wiki/Volkmar_Gessner
Klaus F. Röhl,Q27148390,,male,Röhl,Klaus,1938-05-22 00:00:00+00:00,,http://www.wikidata.org/entity/Q27148390,,https://de.wikipedia.org/wiki/Klaus_F._R%C3%B6hl
Erhard Blankenburg,Q51595283,German sociologist of law (1938-2018),male,Blankenburg,Erhard,1938-10-30 00:00:00+00:00,2018-03-28 00:00:00+00:00,http://www.wikidata.org/entity/Q51595283,https://en.wikipedia.org/wiki/Erhard_Blankenburg,https://de.wikipedia.org/wiki/Erhard_Blankenburg
Manfred Weiss,Q1588285,German jurist,male,Weiss,Manfred,1940-06-01 00:00:00+00:00,,http://www.wikidata.org/entity/Q1588285,,https://de.wikipedia.org/wiki/Manfred_Weiss_(Jurist)
Rüdiger Voigt,Q1682026,German author,male,Voigt,Rüdiger,1941-04-07 00:00:00+00:00,,http://www.wikidata.org/entity/Q1682026,,https://de.wikipedia.org/wiki/R%C3%BCdiger_Voigt
Roland Girtler,Q112873,Austrian historian and sociologist,male,Girtler,Roland,1941-05-31 00:00:00+00:00,,http://www.wikidata.org/entity/Q112873,,https://de.wikipedia.org/wiki/Roland_Girtler
Hubert Treiber,Q1633462,German university teacher,male,,Hubert,1942-07-30 00:00:00+00:00,,http://www.wikidata.org/entity/Q1633462,,https://de.wikipedia.org/wiki/Hubert_Treiber
Brun-Otto Bryde,Q107784,German judge,male,,,1943-01-12 00:00:00+00:00,,http://www.wikidata.org/entity/Q107784,https://en.wikipedia.org/wiki/Brun-Otto_Bryde,https://de.wikipedia.org/wiki/Brun-Otto_Bryde
Hubert Rottleuthner,Q55622018,,male,,Hubert,1944-01-01 00:00:00+00:00,,http://www.wikidata.org/entity/Q55622018,,https://de.wikipedia.org/wiki/Hubert_Rottleuthner
Klaus A. Ziegert,Q112513122,German sociologist of law,male,Ziegert,Klaus,1944-01-01 00:00:00+00:00,,http://www.wikidata.org/entity/Q112513122,,
Dieter Martiny,Q1222459,German jurist,male,Martiny,Dieter,1944-03-21 00:00:00+00:00,,http://www.wikidata.org/entity/Q1222459,,https://de.wikipedia.org/wiki/Dieter_Martiny
Gunther Teubner,Q98304,German academic,male,Teubner,Gunther,1944-04-30 00:00:00+00:00,,http://www.wikidata.org/entity/Q98304,https://en.wikipedia.org/wiki/Gunther_Teubner,https://de.wikipedia.org/wiki/Gunther_Teubner
Konstanze Plett,Q95192683,,female,,,1947-01-01 00:00:00+00:00,,http://www.wikidata.org/entity/Q95192683,,https://de.wikipedia.org/wiki/Konstanze_Plett
Armin Höland,Q15435996,German university professor,male,,Armin,1948-11-04 00:00:00+00:00,,http://www.wikidata.org/entity/Q15435996,,https://de.wikipedia.org/wiki/Armin_H%C3%B6land
Susanne Karstedt,Q2369299,criminologist,female,,Susanne,1949-01-01 00:00:00+00:00,,http://www.wikidata.org/entity/Q2369299,https://en.wikipedia.org/wiki/Susanne_Karstedt,https://de.wikipedia.org/wiki/Susanne_Karstedt
Leo Kißler,Q63203841,,male,,Leo,1949-01-08 00:00:00+00:00,,http://www.wikidata.org/entity/Q63203841,,https://de.wikipedia.org/wiki/Leo_Ki%C3%9Fler
Fritz Jost,Q105946060,,male,,Fritz,1949-08-07 00:00:00+00:00,,http://www.wikidata.org/entity/Q105946060,,https://de.wikipedia.org/wiki/Fritz_Jost_(Rechtswissenschaftler)
Doris Lucke,Q1245242,German university teacher,female,,Doris,1953-01-01 00:00:00+00:00,,http://www.wikidata.org/entity/Q1245242,,https://de.wikipedia.org/wiki/Doris_Lucke
Ralf Rogowski,Q20128038,Law professor (born 1953),male,Rogowski,Ralf,1953-01-01 00:00:00+00:00,,http://www.wikidata.org/entity/Q20128038,https://en.wikipedia.org/wiki/Ralf_Rogowski,
Wolfgang Ludwig-Mayerhofer,Q2590472,German sociologist,male,,Wolfgang,1954-01-01 00:00:00+00:00,,http://www.wikidata.org/entity/Q2590472,,https://de.wikipedia.org/wiki/Wolfgang_Ludwig-Mayerhofer
Kai Bussmann,Q1552696,German jurist,male,Bussmann,Kai,1955-01-01 00:00:00+00:00,,http://www.wikidata.org/entity/Q1552696,,https://de.wikipedia.org/wiki/Kai_Bussmann
Dorothea Jansen,Q21258453,,female,Jansen,Dorothea,1956-08-21 00:00:00+00:00,2017-05-12 00:00:00+00:00,http://www.wikidata.org/entity/Q21258453,,https://de.wikipedia.org/wiki/Dorothea_Jansen
Alfons Bora,Q2644328,German sociologist,male,,Alfons,1957-05-03 00:00:00+00:00,,http://www.wikidata.org/entity/Q2644328,,https://de.wikipedia.org/wiki/Alfons_Bora
Ute Sacksofsky,Q48562036,German legal scholar,female,,Ute,1960-01-01 00:00:00+00:00,,http://www.wikidata.org/entity/Q48562036,,https://de.wikipedia.org/wiki/Ute_Sacksofsky
Stefan Machura,Q95245830,,male,,Stefan,1962-01-01 00:00:00+00:00,,http://www.wikidata.org/entity/Q95245830,,
Ralf Poscher,Q2129347,German legal historian,male,,Ralf,1962-01-01 00:00:00+00:00,,http://www.wikidata.org/entity/Q2129347,,https://de.wikipedia.org/wiki/Ralf_Poscher
Susanne Baer,Q101872,German judge,female,Baer,Susanne,1964-02-16 00:00:00+00:00,,http://www.wikidata.org/entity/Q101872,https://en.wikipedia.org/wiki/Susanne_Baer,https://de.wikipedia.org/wiki/Susanne_Baer
Gralf-Peter Calliess,Q1542033,German jurist,male,Calliess,Gralf-Peter,1967-01-01 00:00:00+00:00,,http://www.wikidata.org/entity/Q1542033,,https://de.wikipedia.org/wiki/Gralf-Peter_Calliess
This diff is collapsed.
fullName,qid,itemdesc,sexOrGender,familyName,givenName,dateOfBirth,dateOfDeath,wikidata_url,wikipedia_en,wikipedia_de
William Twining,Q16095913,Professor of Jurisprudence,male,Twining,Lawrence William,1934-09-22 00:00:00+00:00,,http://www.wikidata.org/entity/Q16095913,https://en.wikipedia.org/wiki/William_Twining,
Philip Aneurin Thomas,Q112432625,,male,Thomas,,1940-01-01 00:00:00+00:00,,http://www.wikidata.org/entity/Q112432625,,
David Sugarman,Q112366094,,male,Sugarman,David,1948-01-01 00:00:00+00:00,,http://www.wikidata.org/entity/Q112366094,,
Carol Smart,Q5044563,Feminist sociologist,female,Smart,Carol,1948-12-20 00:00:00+00:00,,http://www.wikidata.org/entity/Q5044563,https://en.wikipedia.org/wiki/Carol_Smart,
David Nelken,Q5237957,British political scientist,male,,David,1949-01-01 00:00:00+00:00,,http://www.wikidata.org/entity/Q5237957,https://en.wikipedia.org/wiki/David_Nelken,
Rosemary Hunter,Q7368381,Australian jurist,female,Hunter,Rosemary,1962-01-01 00:00:00+00:00,,http://www.wikidata.org/entity/Q7368381,https://en.wikipedia.org/wiki/Rosemary_Hunter,
Sally Wheeler,Q28078278,Professor and Head of the School of Law at Queen's University Belfast,female,Wheeler,Sally,1964-01-01 00:00:00+00:00,,http://www.wikidata.org/entity/Q28078278,https://en.wikipedia.org/wiki/Sally_Wheeler_(legal_scholar),
Don Harris,Q125080407,British jurist and professor at the university of Oxford (1928-2020),,,,,,http://www.wikidata.org/entity/Q125080407,,
Roger Cotterrell,Q7358027,British academic,male,,Roger,,,http://www.wikidata.org/entity/Q7358027,https://en.wikipedia.org/wiki/Roger_Cotterrell,
Fiona Cownie,Q113809561,,female,,,,,http://www.wikidata.org/entity/Q113809561,,
Joanne Conaghan,Q108276256,British legal scholar,female,Conaghan,Joanne,,,http://www.wikidata.org/entity/Q108276256,https://en.wikipedia.org/wiki/Joanne_Conaghan,
| fullName | itemdesc | wikidata_url | wikipedia_en |
|:------------------------------|:----------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------|
| William Twining (1934-) | Professor of Jurisprudence | <a target="_blank" href="http://www.wikidata.org/entity/Q16095913">Wikidata</a> | <a target="_blank" href="https://en.wikipedia.org/wiki/William_Twining">WP (EN)</a> |
| Philip Aneurin Thomas (1940-) | | <a target="_blank" href="http://www.wikidata.org/entity/Q112432625">Wikidata</a> | |
| David Sugarman (1948-) | | <a target="_blank" href="http://www.wikidata.org/entity/Q112366094">Wikidata</a> | |
| Carol Smart (1948-) | Feminist sociologist | <a target="_blank" href="http://www.wikidata.org/entity/Q5044563">Wikidata</a> | <a target="_blank" href="https://en.wikipedia.org/wiki/Carol_Smart">WP (EN)</a> |
| David Nelken (1949-) | British political scientist | <a target="_blank" href="http://www.wikidata.org/entity/Q5237957">Wikidata</a> | <a target="_blank" href="https://en.wikipedia.org/wiki/David_Nelken">WP (EN)</a> |
| Rosemary Hunter (1962-) | Australian jurist | <a target="_blank" href="http://www.wikidata.org/entity/Q7368381">Wikidata</a> | <a target="_blank" href="https://en.wikipedia.org/wiki/Rosemary_Hunter">WP (EN)</a> |
| Sally Wheeler (1964-) | Professor and Head of the School of Law at Queen's University Belfast | <a target="_blank" href="http://www.wikidata.org/entity/Q28078278">Wikidata</a> | <a target="_blank" href="https://en.wikipedia.org/wiki/Sally_Wheeler_(legal_scholar)">WP (EN)</a> |
| Don Harris (-) | British jurist and professor at the university of Oxford (1928-2020) | <a target="_blank" href="http://www.wikidata.org/entity/Q125080407">Wikidata</a> | |
| Roger Cotterrell (-) | British academic | <a target="_blank" href="http://www.wikidata.org/entity/Q7358027">Wikidata</a> | <a target="_blank" href="https://en.wikipedia.org/wiki/Roger_Cotterrell">WP (EN)</a> |
| Fiona Cownie (-) | | <a target="_blank" href="http://www.wikidata.org/entity/Q113809561">Wikidata</a> | |
| Joanne Conaghan (-) | British legal scholar | <a target="_blank" href="http://www.wikidata.org/entity/Q108276256">Wikidata</a> | <a target="_blank" href="https://en.wikipedia.org/wiki/Joanne_Conaghan">WP (EN)</a> |
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
%% Cell type:code id:initial_id tags:
``` python
property_labels_to_ids = {
'sexOrGender': 'P21',
'familyName': 'P734',
'givenName': 'P735',
'dateOfBirth': 'P569',
'dateOfDeath': 'P570',
}
scholars = """
William Twining (Q16095913)
Philip Aneurin Thomas (Q112432625)
David Sugarman (Q112366094)
Carol Smart (Q5044563)
David Nelken (Q5237957)
Rosemary Hunter (Q7368381)
Sally Wheeler (Q28078278)
Don Harris (Q125080407)
Roger Cotterrell (Q7358027)
Fiona Cownie (Q113809561)
Joanne Conaghan (Q108276256)
""".split("\n")
from lib.wikidata import get_person_info_from_wikidata
import pandas as pd
df = get_person_info_from_wikidata(scholars, property_labels_to_ids, include_description=True, debug=False)
df['dateOfBirth'] = pd.to_datetime(df['dateOfBirth'])
df['dateOfDeath'] = pd.to_datetime(df['dateOfDeath'])
df.sort_values(by=["dateOfBirth"], inplace=True, ignore_index=True)
df.to_csv("output/scholars-uk.csv", index=False)
df
```
%% Output
Retrieving scholar data...
Retrieving wikipedia URLs...
fullName qid \
0 William Twining Q16095913
1 Philip Aneurin Thomas Q112432625
2 David Sugarman Q112366094
3 Carol Smart Q5044563
4 David Nelken Q5237957
5 Rosemary Hunter Q7368381
6 Sally Wheeler Q28078278
7 Don Harris Q125080407
8 Roger Cotterrell Q7358027
9 Fiona Cownie Q113809561
10 Joanne Conaghan Q108276256
itemdesc sexOrGender familyName \
0 Professor of Jurisprudence male Twining
1 None male Thomas
2 None male Sugarman
3 Feminist sociologist female Smart
4 British political scientist male
5 Australian jurist female Hunter
6 Professor and Head of the School of Law at Que... female Wheeler
7 British jurist and professor at the university... None
8 British academic male
9 None female
10 British legal scholar female Conaghan
givenName dateOfBirth dateOfDeath \
0 Lawrence William 1934-09-22 00:00:00+00:00 NaT
1 1940-01-01 00:00:00+00:00 NaT
2 David 1948-01-01 00:00:00+00:00 NaT
3 Carol 1948-12-20 00:00:00+00:00 NaT
4 David 1949-01-01 00:00:00+00:00 NaT
5 Rosemary 1962-01-01 00:00:00+00:00 NaT
6 Sally 1964-01-01 00:00:00+00:00 NaT
7 NaT NaT
8 Roger NaT NaT
9 NaT NaT
10 Joanne NaT NaT
wikidata_url \
0 http://www.wikidata.org/entity/Q16095913
1 http://www.wikidata.org/entity/Q112432625
2 http://www.wikidata.org/entity/Q112366094
3 http://www.wikidata.org/entity/Q5044563
4 http://www.wikidata.org/entity/Q5237957
5 http://www.wikidata.org/entity/Q7368381
6 http://www.wikidata.org/entity/Q28078278
7 http://www.wikidata.org/entity/Q125080407
8 http://www.wikidata.org/entity/Q7358027
9 http://www.wikidata.org/entity/Q113809561
10 http://www.wikidata.org/entity/Q108276256
wikipedia_en wikipedia_de
0 https://en.wikipedia.org/wiki/William_Twining None
1 None None
2 None None
3 https://en.wikipedia.org/wiki/Carol_Smart None
4 https://en.wikipedia.org/wiki/David_Nelken None
5 https://en.wikipedia.org/wiki/Rosemary_Hunter None
6 https://en.wikipedia.org/wiki/Sally_Wheeler_(l... None
7 None None
8 https://en.wikipedia.org/wiki/Roger_Cotterrell None
9 None None
10 https://en.wikipedia.org/wiki/Joanne_Conaghan None
%% Cell type:code id:293eecdcf97d4fd1 tags:
``` python
print("\n".join(df.apply(lambda r: f"{r['fullName']} ({r['qid']})", axis=1).to_list()))
```
%% Output
William Twining (Q16095913)
Philip Aneurin Thomas (Q112432625)
David Sugarman (Q112366094)
Carol Smart (Q5044563)
David Nelken (Q5237957)
Rosemary Hunter (Q7368381)
Sally Wheeler (Q28078278)
Don Harris (Q125080407)
Roger Cotterrell (Q7358027)
Fiona Cownie (Q113809561)
Joanne Conaghan (Q108276256)
%% Cell type:code id:e6e8f4591eff1e62 tags:
``` python
from pathlib import Path
from IPython.display import display, Markdown
from lib.wikidata import create_styled_table
include_rows = ['fullName', 'itemdesc', 'wikidata_url', 'wikipedia_en']
md = create_styled_table(df, include_rows).to_markdown(index=False)
Path('output/scholars-uk.md').write_text(md)
display(Markdown(md))
```
%% Output
| fullName | itemdesc | wikidata_url | wikipedia_en |
|:------------------------------|:----------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------|
| William Twining (1934-) | Professor of Jurisprudence | <a target="_blank" href="http://www.wikidata.org/entity/Q16095913">Wikidata</a> | <a target="_blank" href="https://en.wikipedia.org/wiki/William_Twining">WP (EN)</a> |
| Philip Aneurin Thomas (1940-) | | <a target="_blank" href="http://www.wikidata.org/entity/Q112432625">Wikidata</a> | |
| David Sugarman (1948-) | | <a target="_blank" href="http://www.wikidata.org/entity/Q112366094">Wikidata</a> | |
| Carol Smart (1948-) | Feminist sociologist | <a target="_blank" href="http://www.wikidata.org/entity/Q5044563">Wikidata</a> | <a target="_blank" href="https://en.wikipedia.org/wiki/Carol_Smart">WP (EN)</a> |
| David Nelken (1949-) | British political scientist | <a target="_blank" href="http://www.wikidata.org/entity/Q5237957">Wikidata</a> | <a target="_blank" href="https://en.wikipedia.org/wiki/David_Nelken">WP (EN)</a> |
| Rosemary Hunter (1962-) | Australian jurist | <a target="_blank" href="http://www.wikidata.org/entity/Q7368381">Wikidata</a> | <a target="_blank" href="https://en.wikipedia.org/wiki/Rosemary_Hunter">WP (EN)</a> |
| Sally Wheeler (1964-) | Professor and Head of the School of Law at Queen's University Belfast | <a target="_blank" href="http://www.wikidata.org/entity/Q28078278">Wikidata</a> | <a target="_blank" href="https://en.wikipedia.org/wiki/Sally_Wheeler_(legal_scholar)">WP (EN)</a> |
| Don Harris (-) | British jurist and professor at the university of Oxford (1928-2020) | <a target="_blank" href="http://www.wikidata.org/entity/Q125080407">Wikidata</a> | |
| Roger Cotterrell (-) | British academic | <a target="_blank" href="http://www.wikidata.org/entity/Q7358027">Wikidata</a> | <a target="_blank" href="https://en.wikipedia.org/wiki/Roger_Cotterrell">WP (EN)</a> |
| Fiona Cownie (-) | | <a target="_blank" href="http://www.wikidata.org/entity/Q113809561">Wikidata</a> | |
| Joanne Conaghan (-) | British legal scholar | <a target="_blank" href="http://www.wikidata.org/entity/Q108276256">Wikidata</a> | <a target="_blank" href="https://en.wikipedia.org/wiki/Joanne_Conaghan">WP (EN)</a> |
%% Cell type:code id:e49091071f6d1e93 tags:
``` python
```
%% Cell type:code id:a84ea8429f57d924 tags:
``` python
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment