diff --git a/corpus-creation/.env.dist b/corpus-creation/.env.dist
new file mode 100644
index 0000000000000000000000000000000000000000..f4b045dae6259fed99fc361c95d495cf95e88bda
--- /dev/null
+++ b/corpus-creation/.env.dist
@@ -0,0 +1,2 @@
+EZPROXY_USER=
+EZPROXY_PASS=
\ No newline at end of file
diff --git a/corpus-creation/.gitignore b/corpus-creation/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..2eea525d885d5148108f6f3a9a8613863f783d36
--- /dev/null
+++ b/corpus-creation/.gitignore
@@ -0,0 +1 @@
+.env
\ No newline at end of file
diff --git a/corpus-creation/download-journal-corpus.ipynb b/corpus-creation/download-journal-corpus.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..2a59e70a273242b8814c76ac310655e30775a0cb
--- /dev/null
+++ b/corpus-creation/download-journal-corpus.ipynb
@@ -0,0 +1,136 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2024-03-22T21:42:32.983419Z",
+     "start_time": "2024-03-22T21:42:31.435315Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "https://www-jstor-org.ezproxy.lhlt.mpg.de/stable/pdf/20805575.pdf\n",
+      "{'Server': 'Varnish', 'Retry-After': '0', 'Content-Type': '', 'Date': 'Fri, 22 Mar 2024 21:42:32 GMT', 'Via': '1.1 varnish', 'X-Served-By': 'cache-fra-eddf8230065-FRA', 'X-Cache': 'MISS', 'X-Cache-Hits': '0', 'Accept-Ranges': 'none', 'Connection': 'close'}\n"
+     ]
+    },
+    {
+     "ename": "Exception",
+     "evalue": "Unexpected response of type ",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
+      "\u001B[0;31mException\u001B[0m                                 Traceback (most recent call last)",
+      "Cell \u001B[0;32mIn[4], line 65\u001B[0m\n\u001B[1;32m     61\u001B[0m         \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mException\u001B[39;00m(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mUnexpected response of type \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mcontent_type\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m     63\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m size\n\u001B[0;32m---> 65\u001B[0m download_via_ezproxy(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mhttps://www.jstor.org/stable/pdf/20805575.pdf\u001B[39m\u001B[38;5;124m\"\u001B[39m, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mout/20805575.pdf\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n",
+      "Cell \u001B[0;32mIn[4], line 61\u001B[0m, in \u001B[0;36mdownload_via_ezproxy\u001B[0;34m(url, file_path)\u001B[0m\n\u001B[1;32m     59\u001B[0m             f\u001B[38;5;241m.\u001B[39mwrite(chunk)\n\u001B[1;32m     60\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m---> 61\u001B[0m     \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mException\u001B[39;00m(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mUnexpected response of type \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mcontent_type\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m     63\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m size\n",
+      "\u001B[0;31mException\u001B[0m: Unexpected response of type "
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import re\n",
+    "import requests\n",
+    "from urllib.parse import urlencode#\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "load_dotenv()\n",
+    "\n",
+    "# Configure session to handle cookies\n",
+    "session = requests.Session()\n",
+    "\n",
+    "# Environment variables for credentials (assumed to be set in your environment)\n",
+    "EZPROXY_USER = os.getenv(\"EZPROXY_USER\")\n",
+    "EZPROXY_PASS = os.getenv(\"EZPROXY_PASS\")\n",
+    "\n",
+    "# URL and parameters setup\n",
+    "ezproxy_url_prefix = \"https://login.ezproxy.lhlt.mpg.de/login?qurl=\"\n",
+    "params = {\n",
+    "    \"user\": EZPROXY_USER,\n",
+    "    \"pass\": EZPROXY_PASS,\n",
+    "    \"login\": \"Login\"\n",
+    "}\n",
+    "\n",
+    "params_encoded = urlencode(params)\n",
+    "\n",
+    "def download_via_ezproxy(url, file_path):\n",
+    "    # Login with credentials and fetch content\n",
+    "    res = session.post(ezproxy_url_prefix + url, data=params_encoded)\n",
+    "    res = session.get(res.url)  # Follow redirect\n",
+    "    content_type = res.headers[\"Content-Type\"].split(\";\")[0]\n",
+    "    size = 0\n",
+    "    \n",
+    "    if content_type == \"text/html\":\n",
+    "        # Check for access restrictions or find the real document URL\n",
+    "        html = res.text\n",
+    "        \n",
+    "        if \"You currently have no access\" in html:\n",
+    "            raise Exception(\"No access\")\n",
+    "\n",
+    "        match = re.search(r'click \\<a href=\"([^\"]+)\"', html)\n",
+    "        if match:\n",
+    "            url = match.group(1)\n",
+    "        else:\n",
+    "            with open(\"out/invalid-response.html\", \"w\", encoding=\"utf-8\") as f:\n",
+    "                f.write(html)\n",
+    "            raise Exception(\"Invalid html response\")\n",
+    "\n",
+    "        # Refetch from the new URL\n",
+    "        print(url)\n",
+    "        res = session.get(url)\n",
+    "        print(res.headers)\n",
+    "        content_type = res.headers[\"Content-Type\"].split(\";\")[0]\n",
+    "\n",
+    "    if content_type == \"application/pdf\":\n",
+    "        # Download PDF document\n",
+    "        with open(file_path, \"wb\") as f:\n",
+    "            for chunk in res.iter_content(chunk_size=8192):\n",
+    "                size += len(chunk)\n",
+    "                f.write(chunk)\n",
+    "    else:\n",
+    "        raise Exception(f\"Unexpected response of type {content_type}\")\n",
+    "\n",
+    "    return size\n",
+    "\n",
+    "download_via_ezproxy(\"https://www.jstor.org/stable/pdf/20805575.pdf\", \"out/20805575.pdf\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "9bc974e45a72e7a"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/wikidata/scholars-de.ipynb b/wikidata/scholars-de.ipynb
index deb1ec946adab91b5a8da3553fc333fcdb5c7791..da5bc724de0cba3cca17f50a68ec8c9d34638bff 100644
--- a/wikidata/scholars-de.ipynb
+++ b/wikidata/scholars-de.ipynb
@@ -105,6 +105,7 @@
     "Ralf Poscher (Q2129347)\n",
     "Susanne Baer (Q101872)\n",
     "Gralf-Peter Calliess (Q1542033)\n",
+    "Rolf Bender (Q59533437) \n",
     "\"\"\".split(\"\\n\")\n",
     "\n",
     "from lib.wikidata import get_person_info_from_wikidata\n",