diff --git a/wikidata/data-extraction.ipynb b/wikidata/data-extraction.ipynb index ffb9283d7f0b50cc8d3a2fae76344aa9bb87cd6b..839808199fca862bcc33a3af7efa87e775a1a337 100644 --- a/wikidata/data-extraction.ipynb +++ b/wikidata/data-extraction.ipynb @@ -73,16 +73,14 @@ "id": "27d869b6191fa004" }, { - "cell_type": "code", - "execution_count": null, - "outputs": [], + "cell_type": "markdown", "source": [ "## Data from Wikipedia (or any other website)" ], "metadata": { "collapsed": false }, - "id": "2e13909d3eba95cb" + "id": "19d4b0c25a7a8a89" }, { "cell_type": "code", @@ -159,6 +157,16 @@ }, "id": "b276d407b1a723fb" }, + { + "cell_type": "markdown", + "source": [ + "## Run example" + ], + "metadata": { + "collapsed": false + }, + "id": "9442427185ae2a72" + }, { "cell_type": "code", "execution_count": 70, @@ -193,9 +201,23 @@ { "cell_type": "markdown", "source": [ - "## Upload data to WikiData\n", + "## Manual correction\n", "\n", - "The result can be seen at https://www.wikidata.org/wiki/Q51595283" + "The data has now be downloaded to `data/<name>-chatgpt.csv`. It needs to be cleaned and augmented before upload, for example by loading it into OpenRefine and reconciling the `object` column via the WikiData Reconciliation service. Afterward, remove the object-qid column and recreate it via the \"add column based on this column\" function using `ucell.recon.match.id` GREL expression. \n", + "\n", + "Otherwise, you can also just look up the terms and fill out the object-qid column manually. \n", + "\n", + "When done, rename the CSV file by removing the \"-chatgpt\" infix. " + ], + "metadata": { + "collapsed": false + }, + "id": "38be8467270ebc58" + }, + { + "cell_type": "markdown", + "source": [ + "## Upload data to WikiData\n" ], "metadata": { "collapsed": false @@ -204,741 +226,95 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 93, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Created (Q51595283)-[P69]-(Q153987)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:19:54\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added reference https://de.wikipedia.org/wiki/Erhard_Blankenburg with access date\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.2 seconds, 2024-03-15 18:20:04\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created (Q51595283)-[P69]-(Q153006)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:20:14\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added reference https://de.wikipedia.org/wiki/Erhard_Blankenburg with access date\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.2 seconds, 2024-03-15 18:20:24\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created (Q51595283)-[P69]-(Q766145)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:20:34\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added reference https://de.wikipedia.org/wiki/Erhard_Blankenburg with access date\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.1 seconds, 2024-03-15 18:20:44\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created (Q51595283)-[P69]-(Q372608)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.6 seconds, 2024-03-15 18:20:54\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added end time\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:21:04\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added reference https://de.wikipedia.org/wiki/Erhard_Blankenburg with access date\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 8.7 seconds, 2024-03-15 18:21:14\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created (Q51595283)-[P512]-(Q2091008)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:21:24\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added start time\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.2 seconds, 2024-03-15 18:21:34\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added end time\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:21:44\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added reference https://de.wikipedia.org/wiki/Erhard_Blankenburg with access date\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.2 seconds, 2024-03-15 18:21:54\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created (Q51595283)-[P512]-(Q752297)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:22:04\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added start time\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:22:14\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added end time\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:22:24\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added reference https://de.wikipedia.org/wiki/Erhard_Blankenburg with access date\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.0 seconds, 2024-03-15 18:22:34\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created (Q51595283)-[P108]-(Q153987)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.6 seconds, 2024-03-15 18:22:44\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added start time\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.4 seconds, 2024-03-15 18:22:54\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added end time\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:23:04\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added reference https://de.wikipedia.org/wiki/Erhard_Blankenburg with access date\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.2 seconds, 2024-03-15 18:23:14\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created (Q51595283)-[P108]-(Q124866772)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.6 seconds, 2024-03-15 18:23:24\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added start time\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.6 seconds, 2024-03-15 18:23:34\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added end time\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:23:44\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added reference https://de.wikipedia.org/wiki/Erhard_Blankenburg with access date\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.2 seconds, 2024-03-15 18:23:54\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created (Q51595283)-[P108]-(Q2112115)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:24:04\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added end time\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:24:14\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added reference https://de.wikipedia.org/wiki/Erhard_Blankenburg with access date\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.0 seconds, 2024-03-15 18:24:24\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created (Q51595283)-[P108]-(Q832780)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:24:34\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added start time\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:24:44\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added end time\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:24:54\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added reference https://de.wikipedia.org/wiki/Erhard_Blankenburg with access date\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.1 seconds, 2024-03-15 18:25:04\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created (Q51595283)-[P512]-(Q308678)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:25:14\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added start time\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:25:24\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added end time\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:25:34\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added reference https://de.wikipedia.org/wiki/Erhard_Blankenburg with access date\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.0 seconds, 2024-03-15 18:25:44\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created (Q51595283)-[P463]-(Q475602)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:25:54\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added start time\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:26:04\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added end time\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.2 seconds, 2024-03-15 18:26:14\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added reference https://de.wikipedia.org/wiki/Erhard_Blankenburg with access date\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 8.9 seconds, 2024-03-15 18:26:25\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created (Q51595283)-[P8413]-(Q1065414)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.6 seconds, 2024-03-15 18:26:34\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added start time\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:26:44\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added reference https://de.wikipedia.org/wiki/Erhard_Blankenburg with access date\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 8.9 seconds, 2024-03-15 18:26:55\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created (Q51595283)-[P1416]-(Q1459361)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:27:04\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added reference https://de.wikipedia.org/wiki/Erhard_Blankenburg with access date\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.0 seconds, 2024-03-15 18:27:14\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created (Q51595283)-[P98]-(Q96335163)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:27:24\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added reference https://de.wikipedia.org/wiki/Erhard_Blankenburg with access date\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.1 seconds, 2024-03-15 18:27:34\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created (Q65972149)-[P112]-(Q51595283)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:27:44\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added reference https://de.wikipedia.org/wiki/Erhard_Blankenburg with access date\n", - "Refining (Q65972149)-[P112]-(Q51595283)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.5 seconds, 2024-03-15 18:27:54\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added end time\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Sleeping for 9.6 seconds, 2024-03-15 18:28:04\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added reference https://www.linkedin.com/in/erhard-blankenburg-63938058/ with access date\n" + "----------\n", + "(Q51595283)-[P69]-(Q153987) exists.\n", + "Source URL https://de.wikipedia.org/wiki/Erhard_Blankenburg already exists on (Q51595283)-[P69]-(Q153987).\n", + "----------\n", + "(Q51595283)-[P69]-(Q153006) exists.\n", + "Source URL https://de.wikipedia.org/wiki/Erhard_Blankenburg already exists on (Q51595283)-[P69]-(Q153006).\n", + "----------\n", + "(Q51595283)-[P69]-(Q766145) exists.\n", + "Source URL https://de.wikipedia.org/wiki/Erhard_Blankenburg already exists on (Q51595283)-[P69]-(Q766145).\n", + "----------\n", + "(Q51595283)-[P69]-(Q372608) exists.\n", + "Time qualifier P582 with value 1965 already exists on (Q51595283)-[P69]-(Q372608).\n", + "Source URL https://de.wikipedia.org/wiki/Erhard_Blankenburg already exists on (Q51595283)-[P69]-(Q372608).\n", + "----------\n", + "(Q51595283)-[P512]-(Q2091008) exists.\n", + "Time qualifier P585 with value 1965 already exists on (Q51595283)-[P512]-(Q2091008).\n", + "Source URL https://de.wikipedia.org/wiki/Erhard_Blankenburg already exists on (Q51595283)-[P512]-(Q2091008).\n", + "----------\n", + "(Q51595283)-[P512]-(Q752297) exists.\n", + "Time qualifier P585 with value 1966 already exists on (Q51595283)-[P512]-(Q752297).\n", + "Source URL https://de.wikipedia.org/wiki/Erhard_Blankenburg already exists on (Q51595283)-[P512]-(Q752297).\n", + "----------\n", + "(Q51595283)-[P108]-(Q153987) exists.\n", + "Time qualifier P580 with value 1966 already exists on (Q51595283)-[P108]-(Q153987).\n", + "Time qualifier P582 with value 1968 already exists on (Q51595283)-[P108]-(Q153987).\n", + "Source URL https://de.wikipedia.org/wiki/Erhard_Blankenburg already exists on (Q51595283)-[P108]-(Q153987).\n", + "----------\n", + "(Q51595283)-[P108]-(Q124866772) exists.\n", + "Time qualifier P580 with value 1969 already exists on (Q51595283)-[P108]-(Q124866772).\n", + "Time qualifier P582 with value 1971 already exists on (Q51595283)-[P108]-(Q124866772).\n", + "Source URL https://de.wikipedia.org/wiki/Erhard_Blankenburg already exists on (Q51595283)-[P108]-(Q124866772).\n", + "----------\n", + "(Q51595283)-[P108]-(Q2112115) exists.\n", + "Time qualifier P582 with value 1973 already exists on (Q51595283)-[P108]-(Q2112115).\n", + "Source URL https://de.wikipedia.org/wiki/Erhard_Blankenburg already exists on (Q51595283)-[P108]-(Q2112115).\n", + "----------\n", + "(Q51595283)-[P108]-(Q832780) exists.\n", + "Time qualifier P580 with value 1973 already exists on (Q51595283)-[P108]-(Q832780).\n", + "Time qualifier P582 with value 1974 already exists on (Q51595283)-[P108]-(Q832780).\n", + "Source URL https://de.wikipedia.org/wiki/Erhard_Blankenburg already exists on (Q51595283)-[P108]-(Q832780).\n", + "----------\n", + "(Q51595283)-[P512]-(Q308678) exists.\n", + "Time qualifier P585 with value 1974 already exists on (Q51595283)-[P512]-(Q308678).\n", + "Source URL https://de.wikipedia.org/wiki/Erhard_Blankenburg already exists on (Q51595283)-[P512]-(Q308678).\n", + "----------\n", + "(Q51595283)-[P108]-(Q475602) exists.\n", + "Time qualifier P580 with value 1975 already exists on (Q51595283)-[P108]-(Q475602).\n", + "Time qualifier P582 with value 1980 already exists on (Q51595283)-[P108]-(Q475602).\n", + "Source URL https://de.wikipedia.org/wiki/Erhard_Blankenburg already exists on (Q51595283)-[P108]-(Q475602).\n", + "----------\n", + "(Q51595283)-[P8413]-(Q1065414) exists.\n", + "Time qualifier P580 with value 1980 already exists on (Q51595283)-[P8413]-(Q1065414).\n", + "Source URL https://de.wikipedia.org/wiki/Erhard_Blankenburg already exists on (Q51595283)-[P8413]-(Q1065414).\n", + "----------\n", + "(Q51595283)-[P1416]-(Q1459361) exists.\n", + "Source URL https://de.wikipedia.org/wiki/Erhard_Blankenburg already exists on (Q51595283)-[P1416]-(Q1459361).\n", + "----------\n", + "(Q51595283)-[P98]-(Q96335163) exists.\n", + "Source URL https://de.wikipedia.org/wiki/Erhard_Blankenburg already exists on (Q51595283)-[P98]-(Q96335163).\n", + "----------\n", + "(Q65972149)-[P112]-(Q51595283) exists.\n", + "Source URL https://de.wikipedia.org/wiki/Erhard_Blankenburg already exists on (Q65972149)-[P112]-(Q51595283).\n", + "----------\n", + "Refining (Q65972149)-[P112]-(Q51595283)\n", + "Time qualifier P582 with value 2003 already exists on (Q65972149)-[P112]-(Q51595283).\n", + "Source URL https://www.linkedin.com/in/erhard-blankenburg-63938058/ already exists on (Q65972149)-[P112]-(Q51595283).\n" ] } ], "source": [ - "# Code written by GPT-4\n", + "# based on code written by GPT-4\n", "import csv\n", - "import pywikibot\n", - "from pywikibot import Claim, WbTime\n", + "from pywikibot import Claim, WbTime, ItemPage, PropertyPage, Site\n", "from datetime import datetime\n", "\n", "def claim_to_string(claim):\n", - " \"\"\"\n", - " Converts a Pywikibot claim to a descriptive string in the format \"(subject QID)-[predicate PID]-(object QID)\".\n", - " \n", - " :param claim: A Pywikibot Claim object\n", - " :return: A string describing the claim in the specified format.\n", - " \"\"\"\n", - " # Subject QID, which is the ID of the item that owns the claim\n", " subject_qid = claim.on_item.id\n", - "\n", - " # Predicate PID, which is the property ID of the claim\n", " predicate_pid = claim.getID()\n", - "\n", + " \n", " # Object QID, assuming the target is a Wikidata item\n", " # Note: This simplification assumes the claim's target is an item. \n", " # For other target types (e.g., quantities, strings), additional handling is needed.\n", - " if isinstance(claim.getTarget(), pywikibot.ItemPage):\n", + " if isinstance(claim.getTarget(), ItemPage):\n", " object_qid = claim.getTarget().id\n", " else:\n", " # Placeholder or additional logic for non-ItemPage targets\n", @@ -947,8 +323,101 @@ " return f\"({subject_qid})-[{predicate_pid}]-({object_qid})\"\n", "\n", "\n", + "# Function to check if a specific time qualifier exists\n", + "def time_qualifier_exists(claim, qualifier_pid, year_value):\n", + " for qualifier in claim.qualifiers.get(qualifier_pid, []):\n", + " qualifier_date = qualifier.getTarget()\n", + " if qualifier_date.year == year_value:\n", + " print(f'Time qualifier {qualifier_pid} with value {year_value} already exists on {claim_to_string(claim)}.')\n", + " return True\n", + " return False\n", + "\n", + "\n", + "\n", + "def add_time_qualifiers(repo, claim, start_time, end_time):\n", + " qualifiers = []\n", + " \n", + " if (start_time and end_time) and (start_time == end_time):\n", + " if not time_qualifier_exists(claim, 'P585', int(start_time)):\n", + " point_in_time_qualifier = Claim(repo, 'P585')\n", + " point_in_time_qualifier.setTarget(WbTime(year=int(start_time)))\n", + " claim.addQualifier(point_in_time_qualifier, summary='Adding point in time')\n", + " print(f'Added point_in_time qualifier to {claim_to_string(claim)}')\n", + " qualifiers.append(point_in_time_qualifier)\n", + " \n", + " else: \n", + " if start_time and not time_qualifier_exists(claim, 'P580', int(start_time)):\n", + " start_time_qualifier = Claim(repo, 'P580')\n", + " start_time_qualifier.setTarget(WbTime(year=int(start_time)))\n", + " claim.addQualifier(start_time_qualifier, summary='Adding start time')\n", + " print(f'Added start_time qualifier to {claim_to_string(claim)}')\n", + " qualifiers.append(start_time_qualifier)\n", + " \n", + " if end_time and not time_qualifier_exists(claim, 'P582', int(end_time)):\n", + " end_time_qualifier = Claim(repo, 'P582')\n", + " end_time_qualifier.setTarget(WbTime(year=int(end_time)))\n", + " claim.addQualifier(end_time_qualifier, summary='Adding end time')\n", + " print(f'Added end_time qualifier to {claim_to_string(claim)}')\n", + " qualifiers.append(end_time_qualifier)\n", + "\n", + " return qualifiers\n", + "\n", + "# Function to check if a reference with the given URL already exists on the claim\n", + "def reference_url_exists(claim, url):\n", + " for source in claim.getSources():\n", + " if 'P4656' in source or 'P854' in source: # Check both Wikimedia import URL and reference URL\n", + " for prop in source.get('P4656', []) + source.get('P854', []):\n", + " if prop.getTarget() == url:\n", + " print(f'Source URL {url} already exists on {claim_to_string(claim)}.')\n", + " return True\n", + " return False\n", + "\n", + "def qualifier_exists(claim, qualifier_property_id, target):\n", + " for existing_qualifier in claim.qualifiers.get(qualifier_property_id, []):\n", + " if existing_qualifier.getTarget() == target:\n", + " print(f'Qualifier {qualifier_property_id} with value {target.getID()} already exists on {claim_to_string(claim)}.')\n", + " return True\n", + " return False\n", + "\n", + " \n", + "def add_reference(repo, claim, reference_url, retrieved_at_time, qualifiers = None):\n", + " sources=[]\n", + " if reference_url and not reference_url_exists(claim, reference_url):\n", + " # Determine whether the URL is a Wikipedia URL or another type of URL\n", + " property_id = 'P4656' if 'wikipedia.org' in reference_url else 'P854'\n", + " \n", + " # Create the reference claim\n", + " source_claim = Claim(repo, property_id)\n", + " source_claim.setTarget(reference_url)\n", + " sources.append(source_claim)\n", + " \n", + " # Create the 'retrieved at' claim\n", + " retrieved_at_claim = Claim(repo, 'P813')\n", + " retrieved_at_target = WbTime(year=retrieved_at_time.year, month=retrieved_at_time.month, day=retrieved_at_time.day)\n", + " retrieved_at_claim.setTarget(retrieved_at_target)\n", + " sources.append(retrieved_at_claim)\n", + " \n", + " # If a qualifier has been passed for which this reference is the source, add it\n", + " if qualifiers:\n", + " for qualifier in qualifiers:\n", + " supports_qualifier_claim = Claim(repo, 'P10551') # \"supports qualifier\"\n", + " site = Site(\"wikidata\", \"wikidata\")\n", + " property_page = PropertyPage(site, qualifier.getID())\n", + " if not qualifier_exists(claim, 'P10551', property_page):\n", + " supports_qualifier_claim.setTarget(property_page)\n", + " sources.append(supports_qualifier_claim)\n", + " \n", + " # Add the references to the claim\n", + " if len(sources) > 0:\n", + " claim.addSources(sources, summary='Adding reference and retrieved at date')\n", + " print(f'Added references to {claim_to_string(claim)}')\n", + " \n", + " return sources\n", + "\n", + "\n", + "# main function\n", "def update_wikidata(file_path):\n", - " site = pywikibot.Site(\"wikidata\", \"wikidata\")\n", + " site = Site(\"wikidata\", \"wikidata\")\n", " repo = site.data_repository()\n", "\n", " previous_object_qid = None\n", @@ -957,6 +426,7 @@ " with open(file_path, newline='', encoding='utf-8') as csvfile:\n", " reader = csv.DictReader(csvfile)\n", " for row in reader:\n", + " print(\"----------\")\n", " subject_qid = row['subject-qid']\n", " pid = row['pid']\n", " object_qid = row['object-qid']\n", @@ -969,7 +439,7 @@ " claim = previous_claim\n", " print(f'Refining {claim_to_string(claim)}')\n", " else:\n", - " item = pywikibot.ItemPage(repo, subject_qid)\n", + " item = ItemPage(repo, subject_qid)\n", " item.get()\n", "\n", " # Check if the claim already exists\n", @@ -982,34 +452,17 @@ "\n", " if not claim_exists:\n", " claim = Claim(repo, pid)\n", - " target = pywikibot.ItemPage(repo, object_qid)\n", + " target = ItemPage(repo, object_qid)\n", " claim.setTarget(target)\n", " item.addClaim(claim)\n", " print(f'Created {claim_to_string(claim)}')\n", "\n", - " # Add qualifiers\n", - " if start_time:\n", - " start_time_qualifier = Claim(repo, 'P580')\n", - " start_time_qualifier.setTarget(WbTime(year=int(start_time)))\n", - " claim.addQualifier(start_time_qualifier, summary='Adding start time')\n", - " print(f'Added start time')\n", - "\n", - " if end_time:\n", - " end_time_qualifier = Claim(repo, 'P582')\n", - " end_time_qualifier.setTarget(WbTime(year=int(end_time)))\n", - " claim.addQualifier(end_time_qualifier, summary='Adding end time')\n", - " print(f'Added end time')\n", + " # start_time and end_time\n", + " qualifiers = add_time_qualifiers(repo, claim, start_time, end_time)\n", "\n", - " # Add reference\n", - " if reference_url:\n", - " source_claim = Claim(repo, 'P4656' if 'wikipedia' in reference_url else 'P854')\n", - " source_claim.setTarget(reference_url)\n", - " # add retrieved at\n", - " retrieved_at_claim = Claim(repo, 'P813')\n", - " retrieved_at_claim.setTarget(WbTime(year=datetime.utcnow().year, month=datetime.utcnow().month, day=datetime.utcnow().day))\n", - " source_claim.addQualifier(retrieved_at_claim)\n", - " claim.addSources([source_claim], summary='Adding reference')\n", - " print(f\"Added reference {reference_url} with access date\")\n", + " # references\n", + " retrieved_at_time = datetime.utcnow()\n", + " add_reference(repo, claim, reference_url, retrieved_at_time, qualifiers)\n", "\n", " # Remember the object and claim for the next iteration\n", " previous_object_qid = object_qid\n", @@ -1020,21 +473,21 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-03-15T17:28:14.769604100Z", - "start_time": "2024-03-15T17:19:52.058378600Z" + "end_time": "2024-03-16T19:05:35.228905800Z", + "start_time": "2024-03-16T19:05:29.463025100Z" } }, "id": "bdb602fb42b562df" }, { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [], + "cell_type": "markdown", + "source": [ + "The result can be seen at https://www.wikidata.org/wiki/Q51595283" + ], "metadata": { "collapsed": false }, - "id": "63d5f8107f4e3fa6" + "id": "b7757fcf6ea66320" } ], "metadata": { diff --git a/wikidata/data/Erhard Blankenburg.csv b/wikidata/data/Erhard Blankenburg.csv index 72ee157e7cf52516dbe341f6241d5ae6a3ca810b..9da5ad4e6e0a2d18fa626f821e0c6b58e00dbd04 100644 --- a/wikidata/data/Erhard Blankenburg.csv +++ b/wikidata/data/Erhard Blankenburg.csv @@ -10,7 +10,7 @@ Erhard Blankenburg,Q51595283,employer,P108,Quickborner Team,Q124866772,1969,1971 Erhard Blankenburg,Q51595283,employer,P108,Prognos AG,Q2112115,,1973,https://de.wikipedia.org/wiki/Erhard_Blankenburg Erhard Blankenburg,Q51595283,employer,P108,Max Planck Institute for Foreign and International Criminal Law,Q832780,1973,1974,https://de.wikipedia.org/wiki/Erhard_Blankenburg Erhard Blankenburg,Q51595283,academic degree,P512,habilitation,Q308678,1974,1974,https://de.wikipedia.org/wiki/Erhard_Blankenburg -Erhard Blankenburg,Q51595283,employed by,P463,WZB Berlin Social Science Center,Q475602,1975,1980,https://de.wikipedia.org/wiki/Erhard_Blankenburg +Erhard Blankenburg,Q51595283,employer,P108,WZB Berlin Social Science Center,Q475602,1975,1980,https://de.wikipedia.org/wiki/Erhard_Blankenburg Erhard Blankenburg,Q51595283,academic appointment,P8413,Free University of Amsterdam,Q1065414,1980,,https://de.wikipedia.org/wiki/Erhard_Blankenburg Erhard Blankenburg,Q51595283,affiliated with,P1416,International Institute for the Sociology of Law,Q1459361,,,https://de.wikipedia.org/wiki/Erhard_Blankenburg Erhard Blankenburg,Q51595283,editor,P98,Zeitschrift fur Rechtssoziologie,Q96335163,,,https://de.wikipedia.org/wiki/Erhard_Blankenburg