Skip to content
Snippets Groups Projects
Commit 2ef626ae authored by Christian Boulanger's avatar Christian Boulanger
Browse files

Add GPT-4 optimized prompt, refactoring

parent 53a4915d
No related branches found
No related tags found
No related merge requests found
Showing with 208 additions and 204 deletions
timeline_data.xlsx
data/timeline_data.xlsx
user-config.py
.env
apicache
/throttle.ctrl
data/*-chatgpt.csv
data/*-wikipedia.txt
\ No newline at end of file
input/*-chatgpt.csv
input/*-wikipedia.txt
\ No newline at end of file
File moved
File moved
%% Cell type:markdown id:9d6a10996bfdd3cf tags:
# Download wikipedia pages as source of triple extraction
This improves on [data-extraction notebook](./data-extraction.ipynb) by downloading the wikipedia article from which information is to be extracted
%% Cell type:markdown id:2ad235b62e2efc09 tags:
## 1. Download raw Wikipedia page content for the list of scholars and save it
%% Cell type:code id:initial_id tags:
``` python
import os.path
from lib.wikidata import get_wikipedia_page_data
from urllib.parse import unquote
import pandas as pd
df = pd.read_csv('scholars.csv')
for index, row in df.iterrows():
fullName = row['fullName']
language_code = None
if pd.notna(row['wikipedia_de']):
pagetTitle = unquote(os.path.basename(row['wikipedia_de']))
language_code = 'de'
elif pd.notna(row['wikipedia_en']):
pagetTitle = unquote(os.path.basename(row['wikipedia_en']))
language_code = 'en'
else:
print(f'No Wikipedia page exists for {fullName}.')
continue
wikipedia_content_cache_path = f'data/{fullName}-wikipedia.txt'
wikipedia_content_cache_path = f'input/{fullName}-wikipedia.txt'
if not os.path.isfile(wikipedia_content_cache_path):
page_data = get_wikipedia_page_data(pagetTitle, language_code)
if page_data and page_data['page'].exists:
file_content = f"{page_data['url']}\n\n{page_data['content']}"
with open(wikipedia_content_cache_path, 'w', encoding='utf-8') as file:
file.write(file_content)
else:
print(f'No page content could be retrieved for "{fullName}"')
```
%% Output
No Wikipedia page exists for Wolfgang Kaupen.
%% Cell type:markdown id:e83e59e1974a6506 tags:
%% Cell type:markdown id:997c82c5d3d72b7 tags:
## 2. Reduce text size
In order to remove unnecessary information and reduce the token count, edit the downloaded files to contain only the biographical parts from which to extract the information
%% Cell type:markdown id:303ddc348c4a2887 tags:
## 3. Extract the information
%% Cell type:code id:d904e502f8eff15d tags:
``` python
from lib.langchain import extract_to_csv
from langchain_openai import ChatOpenAI
from pathlib import Path
import time
fullName = "Thilo Ramm"
qid="Q59533838"
model = ChatOpenAI(model_name="gpt-4")
template = Path('extraction-prompt.txt').read_text()
website_text = Path(f'data/{fullName}-wikipedia.txt').read_text()
csv_path = f'data/{fullName}.csv'
df = extract_to_csv(model, template, csv_path, fullName=fullName, qid=qid, website_text=website_text)
website_text = Path(f'input/{fullName}-wikipedia.txt').read_text()
for template_file in ['handmade-prompt-template.txt', 'gp4-optimized-prompt-template.txt']:
start_time = time.time()
template = Path(f'prompts/{template_file}').read_text()
df = extract_to_csv(model, template, debug=False, fullName=fullName, qid=qid, website_text=website_text)
end_time = time.time()
execution_time = end_time - start_time # In seconds
minutes, seconds = divmod(execution_time, 60)
print(f"Prompting with {template_file} took {int(minutes)} minutes and {int(seconds)} seconds and extracted {len(df)} triples.")
csv_path = f'output/{fullName}-{template_file.split("-")[0]}-prompt.csv'
df.to_csv(csv_path, index=False)
```
%% Output
C:\Users\boulanger\AppData\Local\miniconda3\Lib\site-packages\langchain_openai\chat_models\base.py:454: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.5/migration/
response = response.dict()
C:\Users\boulanger\AppData\Local\miniconda3\Lib\site-packages\pydantic\main.py:979: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.5/migration/
warnings.warn('The `dict` method is deprecated; use `model_dump` instead.', DeprecationWarning)
Prompting with handmade-prompt-template.txt took 0 minutes and 28 seconds and extracted 13 triples.
Prompting with gp4-optimized-prompt-template.txt took 0 minutes and 19 seconds and extracted 6 triples.
%% Cell type:code id:7e04f078c326387a tags:
%% Cell type:code id:2c11affcd6ffbff4 tags:
``` python
from lib.wikidata import update_wikidata_from_csv
```
......
......@@ -6,11 +6,12 @@ from dotenv import load_dotenv
load_dotenv()
def extract_to_csv(model, template, csv_path, **params):
def extract_to_csv(model, template, debug=False, **params):
prompt = ChatPromptTemplate.from_template(template)
parser = StrOutputParser()
chain = ( prompt | model | parser )
response = chain.invoke(params)
if debug:
print(response)
data = io.StringIO(response)
df = pd.read_csv(data, dtype={'start_time': str, 'end_time': str})
df.to_csv(csv_path, index=False)
\ No newline at end of file
return pd.read_csv(data, dtype={'start_time': str, 'end_time': str})
*
\ No newline at end of file
Thilo Ramm,Q59533838,educated at,P69,University of Marburg,Unnamed: 5,Unnamed: 6,1949,https://www.fernuni-hagen.de/universitaet/aktuelles/2018/07/am16-ramm_thilo_nachruf.shtml
Thilo Ramm,Q59533838,student of,P1066,Fritz von Hippel,,1949.0,1949.0,https://www.fernuni-hagen.de/universitaet/aktuelles/2018/07/am16-ramm_thilo_nachruf.shtml
Thilo Ramm,Q59533838,student of,P1066,Fritz von Hippel,,1951.0,1953.0,https://www.fernuni-hagen.de/universitaet/aktuelles/2018/07/am16-ramm_thilo_nachruf.shtml
Thilo Ramm,Q59533838,academic appointment,P8413,University of Freiburg,,1953.0,1961.0,https://www.fernuni-hagen.de/universitaet/aktuelles/2018/07/am16-ramm_thilo_nachruf.shtml
Thilo Ramm,Q59533838,academic appointment,P8413,University of Giessen,,1962.0,1977.0,https://www.fernuni-hagen.de/universitaet/aktuelles/2018/07/am16-ramm_thilo_nachruf.shtml
Thilo Ramm,Q59533838,founded by,P112,University of Giessen,,,1962.0,https://www.fernuni-hagen.de/universitaet/aktuelles/2018/07/am16-ramm_thilo_nachruf.shtml
Thilo Ramm,Q59533838,employer,P108,Fernuniversität Hagen,,1977.0,,https://www.fernuni-hagen.de/universitaet/aktuelles/2018/07/am16-ramm_thilo_nachruf.shtml
subject-label,subject-qid,predicate,pid,object,object-qid,start_time,end_time,reference_url
Thilo Ramm,Q59533838,educated at,P69,University of Marburg,,,1949,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
Thilo Ramm,Q59533838,educated at,P69,University of Marburg,,1949,1949,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
Thilo Ramm,Q59533838,student of,P1066,Fritz von Hippel,,1949,1949,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
Thilo Ramm,Q59533838,educated at,P69,University of Freiburg,,1953,1953,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
Thilo Ramm,Q59533838,academic appointment,P8413,University of Freiburg,,,1961,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
Thilo Ramm,Q59533838,academic degree,P512,doctorate,,1949,1949,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
doctorate,,conferred by,P1027,University of Marburg,,1949,1949,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
Thilo Ramm,Q59533838,academic appointment,P8413,University of Freiburg,,1953,1961,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
Thilo Ramm,Q59533838,field of work,P101,Socialism,,1953,1953,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
Thilo Ramm,Q59533838,academic appointment,P8413,University of Giessen,,1962,1977,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
Thilo Ramm,Q59533838,field of work,P101,Civil law,,1962,1962,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
Thilo Ramm,Q59533838,field of work,P101,Labour law,,1962,1962,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
Thilo Ramm,Q59533838,field of work,P101,Social law,,1962,1962,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
Thilo Ramm,Q59533838,field of work,P101,Social philosophy,,1962,1962,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
University of Giessen,,founded by,P112,Thilo Ramm,Q59533838,1962,1962,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
Thilo Ramm,Q59533838,academic appointment,P8413,University of Giessen,,1962,,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
Thilo Ramm,Q59533838,academic appointment,P8413,Fernuniversität in Hagen,,1977,,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
Thilo Ramm,Q59533838,field of work,P101,Civil law,,1962,,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
Thilo Ramm,Q59533838,field of work,P101,Labor law,,1962,,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
Thilo Ramm,Q59533838,field of work,P101,Social law,,1962,,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
Thilo Ramm,Q59533838,field of work,P101,Social philosophy,,1962,,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
Thilo Ramm,Q59533838,academic appointment,P8413,FernUniversity in Hagen,,1977,1982,de.wikipedia.org/wiki/Thilo_Ramm?oldid=237476391
**Objective:** Extract structured data from text for Wikidata entry.
**Text Source:** The text is about "{fullName}" with the QID {qid}. It contains sections separated by "-----", each starting with a URL followed by content excerpts.
**Output Format:** Arrange data in a CSV table with columns:
- `subject-label`
- `subject-qid`
- `predicate`
- `pid`
- `object`
- `object-qid`
- `start_time`
- `end_time`
- `reference_url`
**Rules:**
- **subject-label/subject-qid:** Generally, "{fullName}" and {qid}. For specific entities or reverse relationships (e.g., P112), adjust accordingly.
- **predicate/pid:** Use specific predicates like P69, P1066, etc., as guided.
- **object-label/object-qid:** English labels and, if available, QIDs for related institutions/persons.
- **start_time/end_time:** Dates for the duration of the statement's validity. If not specified, leave blank.
- **reference_url:** Source URL of the extracted information.
**Data Extraction Guidance:**
- **Educated at (P69):** Institutions where the person studied.
- **Student of (P1066):** Supervisors for doctoral theses/habilitations.
- **Employer (P108):** Organization paying the salary.
- **Academic appointment (P8413):** Department of a university, or use P108 if unknown.
- **Student (P802):** Persons educated by the subject.
- **Member of (P463):** Organizations/associations the person belongs to.
- **Affiliation (P1416):** Organization affiliated with (not P463/P108).
- **Academic degree (P512):** Academic degrees obtained, refine with "conferred by" (P1027) and "point in time" (P585).
- **Field of work (P101):** Main topics/themes of work.
- **Editor (P98):** Editorial board memberships.
- **Founded by (P112):** Journals/organizations formally founded by the subject (reverse subject and object).
- **Significant person (P3342)/Object has role (P3831):** Reverse subject and object for P3342. Use object as subject and summarize the subject's role for P3831.
**Format:**
- CSV with quoted values. Escape quotes within values properly.
**Example Output:**
```
"subject-label","subject-qid","predicate","pid","object","object-qid","start_time","end_time","reference_url"
"{fullName}","{qid}","educated at","P69","<some institution>","<qid if known>","","","<some url>"
```
**Note:** Extract data strictly as presented. Do not infer or add information not explicitly mentioned in the source material.
-----
{website_text}
\ No newline at end of file
**Objective:** Extract structured data from text for Wikidata entry.
**Text Source:** The text is about "Erhard Blankenburg" with the QID Q51595283. It contains sections separated by "-----", each starting with a URL followed by content excerpts.
**Output Format:** Arrange data in a CSV table with columns:
- `subject-label`
- `subject-qid`
- `predicate`
- `pid`
- `object`
- `object-qid`
- `start_time`
- `end_time`
- `reference_url`
**Rules:**
- **subject-label/subject-qid:** Generally, "{fullName}" and {qid}. For specific entities or reverse relationships (e.g., P112), adjust accordingly.
- **predicate/pid:** Use specific predicates like P69, P1066, etc., as guided.
- **object-label/object-qid:** English labels and, if available, QIDs for related institutions/persons.
- **start_time/end_time:** Dates for the duration of the statement's validity. If not specified, leave blank.
- **reference_url:** Source URL of the extracted information.
**Data Extraction Guidance:**
- **Educated at (P69):** Institutions where the person studied.
- **Student of (P1066):** Supervisors for doctoral theses/habilitations.
- **Employer (P108):** Organization paying the salary.
- **Academic appointment (P8413):** Department of a university, or use P108 if unknown.
- **Student (P802):** Persons educated by the subject.
- **Member of (P463):** Organizations/associations the person belongs to.
- **Affiliation (P1416):** Organization affiliated with (not P463/P108).
- **Academic degree (P512):** Academic degrees obtained, refine with "conferred by" (P1027) and "point in time" (P585).
- **Field of work (P101):** Main topics/themes of work.
- **Editor (P98):** Editorial board memberships.
- **Founded by (P112):** Journals/organizations formally founded by the subject (reverse subject and object).
- **Significant person (P3342)/Object has role (P3831):** Reverse subject and object for P3342. Use object as subject and summarize the subject's role for P3831.
**Format:**
- CSV with quoted values. Escape quotes within values properly.
**Example Output:**
```
"subject-label","subject-qid","predicate","pid","object","object-qid","start_time","end_time","reference_url"
"Erhard Blankenburg","Q51595283","educated at","P69","Freie Universität Berlin","Q123456","","","de.wikipedia.org/wiki/Erhard_Blankenburg?oldid=228627122"
```
**Note:** Extract data strictly as presented. Do not infer or add information not explicitly mentioned in the source material.
-----
de.wikipedia.org/wiki/Erhard_Blankenburg?oldid=228627122
== Werdegang ==
Blankenburg belegte ein Studium der Philosophie, Soziologie und Germanistik an der Universität Freiburg und FU Berlin. Es folgten Graduate Studies und eine Tätigkeit als Forschungsassistent am Department of Sociology der University of Oregon. Ein Studium der Soziologie und Wirtschaftswissenschaft an der Universität Basel beendete er mit dem Abschluss Master of Arts 1965.
Seine [[Promotion (Doktor)|Promotion]] zum Dr. phil. erfolgte an der Universität Basel 1966.
Als Assistent am Institut für Soziologie der Universität Freiburg arbeitete er von 1966 bis 1968.
Von 1969 bis 1971 war er Organisationsberater beim Quickborner Team, Hamburg. Danach arbeitete Blankenburg in Basel als Senior Projektleiter bei der [[Prognos]] in Basel. 1973/1974 war er [[wissenschaftlicher Mitarbeiter]] am [[Max-Planck-Institut für ausländisches und internationales Strafrecht]] in Freiburg. Die [[Habilitation]] für das Fach Soziologie erwarb er 1974 an der Universität Freiburg. Blankenburg war von 1975 bis 1980 Mitglied des [[Wissenschaftszentrum Berlin für Sozialforschung|Wissenschaftszentrums Berlin]], Internationales Institut für Management und Verwaltung.
1980 bekam er einen Ruf auf den Lehrstuhl für Rechtssoziologie der [[Vrije Universiteit Amsterdam]]. Gemeinsam mit [[Wolfgang Kaupen]] spielte er eine wichtige Rolle bei der Neubegründung der Deutschen Rechtssoziologie in den 70er-Jahren (Raiser 1998), ebenso, mit [[Volkmar Gessner]], bei der Gründung des [[International Institute for the Sociology of Law]]. Er gehörte auch zu den Initiatoren und zu den Gründungsherausgebern der [[Zeitschrift für Rechtssoziologie]]. Gemeinsam mit [[Bill Felstiner]] organisierte er 1991 in Amsterdam das erste gemeinsame Treffen der beiden bedeutenden Vereinigungen der Rechtssoziologie (LSA und RCSL). Seine Beschäftigung mit rechtssoziologischen Themen war ungewöhnlich breit, reichte von der Soziologie der Kriminalität über die des Staatsapparates bis zu der des Zivilrechts. Blankenburg war primär Empiriker und Methodiker (vgl. seine Empirische Rechtssoziologie). Seine wichtigsten Beiträge zur rechtssoziologischen Theorie betreffen die Begriffe der "Mobilisierung des Rechts" und der "[[Rechtskultur]](en)". Vor allem aber wirkte er als Koordinator, Organisator und als Vermittler zwischen Wissenschaft und Praxis: "Er bemühte sich nicht, eine 'Schule' zu gründen, ihm fiel es leicht, in stets wechselnden Teams mit wechselnden Wissenschaftlern zusammenzuarbeiten. Wie kein anderer Rechtssoziologe vermochte er, erfolgreich Tagungen zu organisieren, kompetente Referenten zu gewinnen und die Veranstaltungen mit Autorität und zugleich locker zu leiten" ([[Theo Rasehorn]] 1998, 23).
\ No newline at end of file
......@@ -17,7 +17,8 @@ Insert data into the columns as per the following rules:
- academic degree (P512): some instance of academic degree (Q189533). After making this claim, add further triples to refine the P512 statement with triples on "conferred by" (P1027) and on "point in time" (P585).
- field of work (P101): extract the main topics and themes the subject has worked and published on
- editor (P98): add information on memberships in editorial boards of academic journals
- founded by (P112): add information on journals, associations or other organizations that the subject helped to establish. When adding this claim, YOU MUST switch subject and object to express the reverse relationship
- founded by (P112): add information on journals, associations or other organizations of which the subject officially was a (co-) founder. When adding this claim, YOU MUST switch subject and object to express the reverse relationship.
- "significant person" (P3342) and "object has role" (P3831): If the subject was an important factor in the establishment of some object without being an official founder. As with P112, reverse subject and object for P3342. For P3831, use the object as subject of the claim and summarize the subject's role as the value.
- object-label/object-qid: here the English labels and, if known, the QIDs for the institutions and persons who are the objects of the triple. If you are not absolutely sure, leave blank
- start_time: the date/year from which the triple statement is true. Leave blank if the date is not specified or cannot be inferred, or the triple involves P585
- end_time: the date/year up to which the triple statement is true. If it is an event, identical to start_time
......
Your task is to extract data from the text and to output it in a format that is suitable as a data source for adding triples to Wikidata.
The text is about "Erhard Blankenburg" with the QID Q51595283. It consists of one or more sections separated by "-----". The sections begin with a standalone URL followed by an excerpt of the content that can be found at this URL.
Arrange the extracted information into a table with the following columns: subject-label, subject-qid, predicate, pid, object, object-qid, start_time, end_time, reference_url.
Insert data into the columns as per the following rules:
- subject-label/subject-qid: In general, the subject is "{fullName}" with the QID {qid}. However, refining/qualifying statements can also be made about other entities, as with the academic degree (P512) item below. Also, in the case of P112, subject and object must be reversed
- predicate/pid:
- educated at (P69): Institutions at which the person studied
- student of (P1066): If supervisors of doctoral theses and habilitations are specified
- employer (P108): is the organization that pays the salary of a person (this can be a company, and institution or the university)
- academic appointment (P8413): usually the department of a university, if this or its QID are not known, like P108
- student (P802): persons contained in WikiData who were educated by the subject
- member of (P463): Organizations and associations to which the person belongs (excluding P108)
- affiliation (P1416): Organization that the subject is affiliated with (not member of or employed by)
- academic degree (P512): some instance of academic degree (Q189533). After making this claim, add further triples to refine the P512 statement with triples on "conferred by" (P1027) and on "point in time" (P585).
- field of work (P101): extract the main topics and themes the subject has worked and published on
- editor (P98): add information on memberships in editorial boards of academic journals
- founded by (P112): add information on journals, associations or other organizations of which the subject officially was a (co-) founder. When adding this claim, YOU MUST switch subject and object to express the reverse relationship.
- "significant person" (P3342) and "object has role" (P3831): If the subject was an important factor in the establishment of some object without being an official founder. As with P112, reverse subject and object for P3342. For P3831, use the object as subject of the claim and summarize the subject's role as the value.
- object-label/object-qid: here the English labels and, if known, the QIDs for the institutions and persons who are the objects of the triple. If you are not absolutely sure, leave blank
- start_time: the date/year from which the triple statement is true. Leave blank if the date is not specified or cannot be inferred, or the triple involves P585
- end_time: the date/year up to which the triple statement is true. If it is an event, identical to start_time
- reference_url: this is the source URL of the text from which the information was extracted.
Return information as a comma-separated values (CSV). Include the column headers. Surround the values with quotes. If values contain quotes, properly escape them.
DO NOT, UNDER ANY CIRCUMSTANCES, provide any commentary or explanations, just return the raw data. Do not make anything up that is not in the source material.
-----
de.wikipedia.org/wiki/Erhard_Blankenburg?oldid=228627122
== Werdegang ==
Blankenburg belegte ein Studium der Philosophie, Soziologie und Germanistik an der Universität Freiburg und FU Berlin. Es folgten Graduate Studies und eine Tätigkeit als Forschungsassistent am Department of Sociology der University of Oregon. Ein Studium der Soziologie und Wirtschaftswissenschaft an der Universität Basel beendete er mit dem Abschluss Master of Arts 1965.
Seine [[Promotion (Doktor)|Promotion]] zum Dr. phil. erfolgte an der Universität Basel 1966.
Als Assistent am Institut für Soziologie der Universität Freiburg arbeitete er von 1966 bis 1968.
Von 1969 bis 1971 war er Organisationsberater beim Quickborner Team, Hamburg. Danach arbeitete Blankenburg in Basel als Senior Projektleiter bei der [[Prognos]] in Basel. 1973/1974 war er [[wissenschaftlicher Mitarbeiter]] am [[Max-Planck-Institut für ausländisches und internationales Strafrecht]] in Freiburg. Die [[Habilitation]] für das Fach Soziologie erwarb er 1974 an der Universität Freiburg. Blankenburg war von 1975 bis 1980 Mitglied des [[Wissenschaftszentrum Berlin für Sozialforschung|Wissenschaftszentrums Berlin]], Internationales Institut für Management und Verwaltung.
1980 bekam er einen Ruf auf den Lehrstuhl für Rechtssoziologie der [[Vrije Universiteit Amsterdam]]. Gemeinsam mit [[Wolfgang Kaupen]] spielte er eine wichtige Rolle bei der Neubegründung der Deutschen Rechtssoziologie in den 70er-Jahren (Raiser 1998), ebenso, mit [[Volkmar Gessner]], bei der Gründung des [[International Institute for the Sociology of Law]]. Er gehörte auch zu den Initiatoren und zu den Gründungsherausgebern der [[Zeitschrift für Rechtssoziologie]]. Gemeinsam mit [[Bill Felstiner]] organisierte er 1991 in Amsterdam das erste gemeinsame Treffen der beiden bedeutenden Vereinigungen der Rechtssoziologie (LSA und RCSL). Seine Beschäftigung mit rechtssoziologischen Themen war ungewöhnlich breit, reichte von der Soziologie der Kriminalität über die des Staatsapparates bis zu der des Zivilrechts. Blankenburg war primär Empiriker und Methodiker (vgl. seine Empirische Rechtssoziologie). Seine wichtigsten Beiträge zur rechtssoziologischen Theorie betreffen die Begriffe der "Mobilisierung des Rechts" und der "[[Rechtskultur]](en)". Vor allem aber wirkte er als Koordinator, Organisator und als Vermittler zwischen Wissenschaft und Praxis: "Er bemühte sich nicht, eine 'Schule' zu gründen, ihm fiel es leicht, in stets wechselnden Teams mit wechselnden Wissenschaftlern zusammenzuarbeiten. Wie kein anderer Rechtssoziologe vermochte er, erfolgreich Tagungen zu organisieren, kompetente Referenten zu gewinnen und die Veranstaltungen mit Autorität und zugleich locker zu leiten" ([[Theo Rasehorn]] 1998, 23).
\ No newline at end of file
%% Cell type:markdown id:8ab9671b6edfa9f tags:
# Update WikiData
%% Cell type:code id:59d15dc93174e6ad tags:
``` python
import pywikibot
from pywikibot import Claim, WbTime
from datetime import datetime
site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()
item = pywikibot.ItemPage(repo, 'Q51595283')
item.get()
print("Retrieved wikidata item.")
# Function to check if a qualifier exists
def qualifier_exists(claim, qualifier_property, target_value):
for qualifier in claim.qualifiers.get(qualifier_property, []):
if qualifier.getTarget() == target_value:
print(f'Claim {claim.getID()} {qualifier_property} {target_value} already exists.')
return True
return False
# Function to check if a reference exists
def reference_exists(claim, source_property, target_url):
for source in claim.sources:
for prop_id, values in source.items():
if prop_id == source_property:
for value in values:
if value.getTarget() == target_url:
print(f'Reference {source_property} {target_url} already exists for {claim.getID()}.')
return True
return False
# Ensure employment claim is not duplicated
employment_claim_exists = False
for claim in item.claims.get('P108', []): # P108 is 'employer'
if claim.getTarget().getID() == 'Q1065414': # University of Amsterdam
employment_claim_exists = True
break
if not employment_claim_exists:
claim = Claim(repo, 'P108')
target = pywikibot.ItemPage(repo, 'Q1065414')
claim.setTarget(target)
item.addClaim(claim)
print(f'Created new claim {claim}...')
# Add start and end time qualifiers if they don't already exist
start_time = WbTime(year=1980)
if not qualifier_exists(claim, 'P580', start_time):
start_qualifier = Claim(repo, 'P580')
start_qualifier.setTarget(start_time)
claim.addQualifier(start_qualifier)
print(f'Added new qualifier {start_qualifier}...')
end_time = WbTime(year=2003)
if not qualifier_exists(claim, 'P582', end_time):
end_qualifier = Claim(repo, 'P582')
end_qualifier.setTarget(end_time)
claim.addQualifier(end_qualifier)
print(f'Added new qualifier {end_qualifier}...')
# Add references with 'retrieved at' qualifier
current_datetime = datetime.utcnow()
retrieved_at_datetime = WbTime(year=current_datetime.year, month=current_datetime.month, day=current_datetime.day)
wikipedia_url = 'https://de.wikipedia.org/wiki/Erhard_Blankenburg'
linkedin_url = 'https://www.linkedin.com/in/erhard-blankenburg-63938058/'
if not reference_exists(claim, 'P4656', wikipedia_url):
# Add Wikipedia reference
wikipedia_reference = Claim(repo, 'P4656')
wikipedia_reference.setTarget(wikipedia_url)
retrieved_at_claim_wiki = Claim(repo, 'P813')
retrieved_at_claim_wiki.setTarget(retrieved_at_datetime)
wikipedia_reference.addQualifier(retrieved_at_claim_wiki)
claim.addSources([wikipedia_reference])
print(f'Added new source {wikipedia_reference}...')
if not reference_exists(claim, 'P854', linkedin_url):
# Add LinkedIn reference
linkedin_reference = Claim(repo, 'P854')
linkedin_reference.setTarget(linkedin_url)
retrieved_at_claim_linkedin = Claim(repo, 'P813')
retrieved_at_claim_linkedin.setTarget(retrieved_at_datetime)
linkedin_reference.addQualifier(retrieved_at_claim_linkedin)
claim.addSources([linkedin_reference])
print(f'Added new source {linkedin_reference}...')
print('Modifications applied in an idempotent manner.')
```
%% Output
Retrieved wikidata item.
Claim P108 P580 {
"after": 0,
"before": 0,
"calendarmodel": "http://www.wikidata.org/entity/Q1985727",
"precision": 9,
"time": "+00000001980-01-01T00:00:00Z",
"timezone": 0
} already exists.
Claim P108 P582 {
"after": 0,
"before": 0,
"calendarmodel": "http://www.wikidata.org/entity/Q1985727",
"precision": 9,
"time": "+00000002003-01-01T00:00:00Z",
"timezone": 0
} already exists.
Reference P4656 https://de.wikipedia.org/wiki/Erhard_Blankenburg already exists for P108.
Reference P854 https://www.linkedin.com/in/erhard-blankenburg-63938058/ already exists for P108.
Modifications applied in an idempotent manner.
%% Cell type:code id:d702eb98f46957ca tags:
``` python
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment