From 7753042b0c88fbccf52f13aaf4c923ba0d6b6da8 Mon Sep 17 00:00:00 2001 From: Christian Boulanger <boulanger@lhlt.mpg.de> Date: Mon, 15 Jan 2024 10:15:54 +0100 Subject: [PATCH] update notebooks --- .../identify-editors-with-llm.ipynb | 2 +- rocket-chat-api/rocket-chat-api.ipynb | 34 ++++++++++++------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/german-law-journal-network/identify-editors-with-llm.ipynb b/german-law-journal-network/identify-editors-with-llm.ipynb index 2f53234..e9c1452 100644 --- a/german-law-journal-network/identify-editors-with-llm.ipynb +++ b/german-law-journal-network/identify-editors-with-llm.ipynb @@ -108,7 +108,7 @@ "journal_name = \"Kritische Vierteljahresschrift für Gesetzgebung und Rechtswissenschaft\"\n", "instruction = f\"Finde im folgenden Text die Herausgeber, Redaktion/Schriftleitung und Beirat der Zeitschrift '{journal_name}' und gebe sie im CSV-Format zurück mit den Spalten 'lastname', 'firstname', 'title', 'position', 'affiliation','role'. Die Spalte 'role' enthält entweder 'Herausgeber', 'Redaktion', 'Beirat', 'Schriftleitung' oder ist leer wenn nicht bestimmbar. Wenn keine passenden Informationen verfügbar sind, gebe nur den CSV-Header zurück. Setze alle Werte in den CSV-Spalten in Anführungszeichen.\"\n", "\n", - "google_query = f'{journal_name} (herausgeber | redaktion | beirat)'\n", + "google_query = f'{journal_name} intext:herausgeber|herausgegeben|redakt|schriftleit|beirat'\n", "urls = run_google_search(google_query, lang=\"de\", exclude=['jstor.org'], num_results=1)\n", "website_data = download(urls[0]) # get the content of the first website found\n", "# compare performance of different GPT models\n", diff --git a/rocket-chat-api/rocket-chat-api.ipynb b/rocket-chat-api/rocket-chat-api.ipynb index 271243a..4b97602 100644 --- a/rocket-chat-api/rocket-chat-api.ipynb +++ b/rocket-chat-api/rocket-chat-api.ipynb @@ -2,13 +2,13 @@ "cells": [ { "cell_type": "code", - "execution_count": 27, + "execution_count": 2, "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { - "end_time": "2024-01-14T21:49:59.779267900Z", - "start_time": "2024-01-14T21:49:59.447386400Z" + "end_time": "2024-01-15T09:15:30.953499600Z", + "start_time": "2024-01-15T09:15:30.602210400Z" } }, "outputs": [ @@ -16,7 +16,7 @@ "data": { "text/plain": "[('Note-to-self', '6123583208648d50d46136e7', 'Grib6LkDEh8BjXm5A'),\n ('SLT-Team', '621f3257dda518ec820fec0f', 'DLX2YQYzmwSefCFSs'),\n ('Arbeitsaufträge-SHK-Abt-3',\n '6231a585396730692a6254af',\n 'h2F4wES7ALisyjFeS'),\n ('Machine-Learning-Team', '6243f60f3bb97e25a63651be', 'sibAdkwHQzqJChxH9'),\n ('mpilhlt-DH', '6246b44404cc8d8ba31517c2', 'yT4DM9pje2Cbnz7w3'),\n ('reference-extraction-workshops-2023',\n '636a209aa7ef138927c64671',\n 'CXECckP3iJcT7Wzvm'),\n ('Literaturverwaltung', '642ab2edd4f482c368a7e364', 'ALzasJ7iwLpNzE34B'),\n ('Arbeitsgruppe-Gleichstellungsbericht',\n '6492f49d7821ad51fcbbef97',\n '6492f49e7821ad51fcbbef99'),\n ('mpilhlt-Auer-Doctoral-Students-plus-Coordinator',\n '64b50afdf6d0cde3515015c3',\n '64b50afdf6d0cde3515015c5'),\n ('DH-Projekte-Abt-Auer',\n '655cd77374c1eb212615898e',\n '655cd77374c1eb2126158990')]" }, - "execution_count": 27, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -56,19 +56,19 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 3, "outputs": [ { "ename": "HTTPError", - "evalue": "400 Client Error: Bad Request for url: https://chat.gwdg.de/api/v1/channels.history?roomId=6243f60f3bb97e25a63651be", + "evalue": "400 Client Error: Bad Request for url: https://chat.gwdg.de/api/v1/channels.history?roomId=sibAdkwHQzqJChxH9", "output_type": "error", "traceback": [ "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m", "\u001B[1;31mHTTPError\u001B[0m Traceback (most recent call last)", - "Cell \u001B[1;32mIn[29], line 1\u001B[0m\n\u001B[1;32m----> 1\u001B[0m msgs \u001B[38;5;241m=\u001B[39m \u001B[43mcall_rocket_chat_api\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mchannels.history\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mroomId\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43m6243f60f3bb97e25a63651be\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m)\u001B[49m\u001B[38;5;241m.\u001B[39mget(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mmessages\u001B[39m\u001B[38;5;124m'\u001B[39m)\n", - "Cell \u001B[1;32mIn[27], line 15\u001B[0m, in \u001B[0;36mcall_rocket_chat_api\u001B[1;34m(command, **params)\u001B[0m\n\u001B[0;32m 10\u001B[0m headers \u001B[38;5;241m=\u001B[39m {\n\u001B[0;32m 11\u001B[0m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mX-Auth-Token\u001B[39m\u001B[38;5;124m'\u001B[39m: os\u001B[38;5;241m.\u001B[39mgetenv(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mROCKETCHAT_API_KEY\u001B[39m\u001B[38;5;124m'\u001B[39m),\n\u001B[0;32m 12\u001B[0m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mX-User-Id\u001B[39m\u001B[38;5;124m'\u001B[39m: os\u001B[38;5;241m.\u001B[39mgetenv(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mROCKETCHAT_USER_ID\u001B[39m\u001B[38;5;124m'\u001B[39m)\n\u001B[0;32m 13\u001B[0m }\n\u001B[0;32m 14\u001B[0m response \u001B[38;5;241m=\u001B[39m requests\u001B[38;5;241m.\u001B[39mget(url, headers\u001B[38;5;241m=\u001B[39mheaders, params\u001B[38;5;241m=\u001B[39mparams)\n\u001B[1;32m---> 15\u001B[0m \u001B[43mresponse\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mraise_for_status\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 16\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m 17\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m response\u001B[38;5;241m.\u001B[39mjson()\n", - "File \u001B[1;32m~\\anaconda3\\lib\\site-packages\\requests\\models.py:1021\u001B[0m, in \u001B[0;36mResponse.raise_for_status\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m 1016\u001B[0m http_error_msg \u001B[38;5;241m=\u001B[39m (\n\u001B[0;32m 1017\u001B[0m \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mstatus_code\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m Server Error: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mreason\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m for url: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39murl\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m 1018\u001B[0m )\n\u001B[0;32m 1020\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m http_error_msg:\n\u001B[1;32m-> 1021\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m HTTPError(http_error_msg, response\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m)\n", - "\u001B[1;31mHTTPError\u001B[0m: 400 Client Error: Bad Request for url: https://chat.gwdg.de/api/v1/channels.history?roomId=6243f60f3bb97e25a63651be" + "Cell \u001B[1;32mIn[3], line 1\u001B[0m\n\u001B[1;32m----> 1\u001B[0m msgs \u001B[38;5;241m=\u001B[39m call_rocket_chat_api(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mchannels.history\u001B[39m\u001B[38;5;124m'\u001B[39m, roomId\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124msibAdkwHQzqJChxH9\u001B[39m\u001B[38;5;124m'\u001B[39m)\u001B[38;5;241m.\u001B[39mget(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mmessages\u001B[39m\u001B[38;5;124m'\u001B[39m)\n", + "Cell \u001B[1;32mIn[2], line 15\u001B[0m, in \u001B[0;36mcall_rocket_chat_api\u001B[1;34m(command, **params)\u001B[0m\n\u001B[0;32m 10\u001B[0m headers \u001B[38;5;241m=\u001B[39m {\n\u001B[0;32m 11\u001B[0m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mX-Auth-Token\u001B[39m\u001B[38;5;124m'\u001B[39m: os\u001B[38;5;241m.\u001B[39mgetenv(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mROCKETCHAT_API_KEY\u001B[39m\u001B[38;5;124m'\u001B[39m),\n\u001B[0;32m 12\u001B[0m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mX-User-Id\u001B[39m\u001B[38;5;124m'\u001B[39m: os\u001B[38;5;241m.\u001B[39mgetenv(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mROCKETCHAT_USER_ID\u001B[39m\u001B[38;5;124m'\u001B[39m)\n\u001B[0;32m 13\u001B[0m }\n\u001B[0;32m 14\u001B[0m response \u001B[38;5;241m=\u001B[39m requests\u001B[38;5;241m.\u001B[39mget(url, headers\u001B[38;5;241m=\u001B[39mheaders, params\u001B[38;5;241m=\u001B[39mparams)\n\u001B[1;32m---> 15\u001B[0m response\u001B[38;5;241m.\u001B[39mraise_for_status()\n\u001B[0;32m 16\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m 17\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m response\u001B[38;5;241m.\u001B[39mjson()\n", + "File \u001B[1;32m~\\AppData\\Local\\miniconda3\\Lib\\site-packages\\requests\\models.py:1021\u001B[0m, in \u001B[0;36mResponse.raise_for_status\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m 1016\u001B[0m http_error_msg \u001B[38;5;241m=\u001B[39m (\n\u001B[0;32m 1017\u001B[0m \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mstatus_code\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m Server Error: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mreason\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m for url: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39murl\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m 1018\u001B[0m )\n\u001B[0;32m 1020\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m http_error_msg:\n\u001B[1;32m-> 1021\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m HTTPError(http_error_msg, response\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m)\n", + "\u001B[1;31mHTTPError\u001B[0m: 400 Client Error: Bad Request for url: https://chat.gwdg.de/api/v1/channels.history?roomId=sibAdkwHQzqJChxH9" ] } ], @@ -79,11 +79,21 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-01-14T21:51:18.225934200Z", - "start_time": "2024-01-14T21:51:17.994275Z" + "end_time": "2024-01-15T09:15:37.298747900Z", + "start_time": "2024-01-15T09:15:36.819053300Z" } }, "id": "709f6d8c845a0056" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + }, + "id": "caee699ae846a8fc" } ], "metadata": { -- GitLab