From 167d0d5cf242a2786a40fcb7d6cf75c97b8d6d14 Mon Sep 17 00:00:00 2001 From: Christian Boulanger <info@bibliograph.org> Date: Mon, 4 Mar 2024 16:08:08 +0100 Subject: [PATCH] commit changes to langchain-experiments --- langchain-experiments/compare-models.ipynb | 249 ++++++++++++------ .../data/input/journal-website.txt | 80 +++--- langchain-experiments/readme.md | 5 +- 3 files changed, 212 insertions(+), 122 deletions(-) diff --git a/langchain-experiments/compare-models.ipynb b/langchain-experiments/compare-models.ipynb index d6f8a30..27a2534 100644 --- a/langchain-experiments/compare-models.ipynb +++ b/langchain-experiments/compare-models.ipynb @@ -19,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "outputs": [], "source": [ "import io\n", @@ -46,17 +46,10 @@ " | StrOutputParser()\n", " )\n", " return response_to_df(chain.invoke(params))\n", - "\n", - "with open('data/input/journal-website.txt', encoding='utf-8') as f:\n", - " website_text = f.read()\n", - "journal_name = \"AUR - Agrar- und Umweltrecht\"" + "\n" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-01-17T19:43:14.633251500Z", - "start_time": "2024-01-17T19:43:07.158102900Z" - } + "collapsed": false }, "id": "e46d0648c1c6c96a" }, @@ -82,17 +75,21 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "outputs": [], "source": [ + "with open('data/input/journal-website.txt', encoding='utf-8') as f:\n", + " website_text = f.read()\n", + "journal_name = \"AUR - Agrar- und Umweltrecht\"\n", + "\n", "template = \"\"\"\n", "In the following German text, which was scraped from a website, find the members of the editorial board or the advisory board of the journal '{journal_name}' as per the following rules:\n", "- In German, typical labels for these roles are \"Herausgeber\", \"Redaktion/Redakteur/Schriftleitung\" and \"Beirat\". \n", "- Return the data as comma-separated values, which can be saved to a `.csv` file. Put all values in the CSV rows in quotes. \n", "- The CSV data must have the columns 'lastname', 'firstname', 'title', 'position', 'affiliation','role'. \n", "- The column 'role' must contain either 'Herausgeber', 'Redaktion', 'Beirat' or is empty. Leave the column empty if you cannot determine the role. Use 'Redaktion' for the \"Schriftleitung\" role.\n", - "- The column 'title' should contain academic titles such as \"Dr.\" or \"Prof. Dr.\"\n", - "- The column 'position' should contain the job title\n", + "- The column 'title' should contain academic titles, such as \"Dr.\" or \"Prof. Dr.\"\n", + "- The column 'position' should contain the job title, typically \"Rechtsanwalt\", \"Regierungsrat\" or \"Richter am Oberlandesgericht\"\n", "- The column 'affiliation' contains the institution or organization the person belongs to, or the city if one is mentioned\n", "- If the journal is published (\"herausgeben von\") by an association, institute or other organization, but its name in the column 'lastname'. \n", "- If you cannot find any information, simply return the CSV header. \n", @@ -102,11 +99,7 @@ "\"\"\"" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-01-17T20:46:02.624154500Z", - "start_time": "2024-01-17T20:46:02.598111600Z" - } + "collapsed": false }, "id": "23aef80911796078" }, @@ -125,18 +118,8 @@ }, { "cell_type": "code", - "execution_count": 3, - "outputs": [ - { - "data": { - "text/plain": " lastname firstname title \\\n0 DGAR NaN NaN \n1 Busse Christian Dr. \n2 Endres Ewald Prof. Dr. \n3 Francois Matthias Dr. \n4 von Garmissen Bernd Dr. \n5 Glas Ingo NaN \n6 Graß Christiane NaN \n7 Haarstrich Jens NaN \n8 Koch Erich Dr. \n9 Köpl Christian Dr. \n10 Martinez Jose Prof. Dr. \n11 Nies Volkmar NaN \n12 Stephany Ralf NaN \n13 Wedemeyer Harald NaN \n14 Martinez José Prof. Dr. \n15 Nies Volkmar LLD \n\n position \\\n0 Deutsche Gesellschaft für Agrarrecht \n1 Regierungsdirektor \n2 NaN \n3 Rechtsanwalt \n4 Rechtsanwalt \n5 Rechtsanwalt \n6 Rechtsanwältin \n7 Rechtsanwalt \n8 Ltd. Verwaltungsdirektor \n9 Ministerialrat \n10 NaN \n11 Ltd. Landwirtschaftsdirektor \n12 Rechtsanwalt/Steuerberater \n13 Rechtsanwalt \n14 Erster Schriftleiter \n15 Zweiter Schriftleiter \n\n affiliation role \n0 NaN Herausgeber \n1 Bundesministerium für Ernährung und Landwirtsc... Redaktion \n2 Hochschule Weihenstephan-Triesdorf, Freising Redaktion \n3 Bitburg Redaktion \n4 Göttingen Redaktion \n5 Rostock Redaktion \n6 Bonn Redaktion \n7 Peine Redaktion \n8 Sozialversicherung für Landwirtschaft, Forsten... Redaktion \n9 Bayerisches Staatsministerium für Ernährung, L... Redaktion \n10 Institut für Landwirtschaftsrecht, Georg-Augus... Redaktion \n11 Landwirtschaftskammer NRW, Bonn Redaktion \n12 Bonn Redaktion \n13 Landvolk Niedersachsen, Hannover Redaktion \n14 Institut für Landwirtschaftsrecht, Göttingen Redaktion \n15 50170 Kerpen Redaktion ", - "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>lastname</th>\n <th>firstname</th>\n <th>title</th>\n <th>position</th>\n <th>affiliation</th>\n <th>role</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>DGAR</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>Deutsche Gesellschaft für Agrarrecht</td>\n <td>NaN</td>\n <td>Herausgeber</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Busse</td>\n <td>Christian</td>\n <td>Dr.</td>\n <td>Regierungsdirektor</td>\n <td>Bundesministerium für Ernährung und Landwirtsc...</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Endres</td>\n <td>Ewald</td>\n <td>Prof. Dr.</td>\n <td>NaN</td>\n <td>Hochschule Weihenstephan-Triesdorf, Freising</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Francois</td>\n <td>Matthias</td>\n <td>Dr.</td>\n <td>Rechtsanwalt</td>\n <td>Bitburg</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>4</th>\n <td>von Garmissen</td>\n <td>Bernd</td>\n <td>Dr.</td>\n <td>Rechtsanwalt</td>\n <td>Göttingen</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Glas</td>\n <td>Ingo</td>\n <td>NaN</td>\n <td>Rechtsanwalt</td>\n <td>Rostock</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>6</th>\n <td>Graß</td>\n <td>Christiane</td>\n <td>NaN</td>\n <td>Rechtsanwältin</td>\n <td>Bonn</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>7</th>\n <td>Haarstrich</td>\n <td>Jens</td>\n <td>NaN</td>\n <td>Rechtsanwalt</td>\n <td>Peine</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Koch</td>\n <td>Erich</td>\n <td>Dr.</td>\n <td>Ltd. Verwaltungsdirektor</td>\n <td>Sozialversicherung für Landwirtschaft, Forsten...</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Köpl</td>\n <td>Christian</td>\n <td>Dr.</td>\n <td>Ministerialrat</td>\n <td>Bayerisches Staatsministerium für Ernährung, L...</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>10</th>\n <td>Martinez</td>\n <td>Jose</td>\n <td>Prof. Dr.</td>\n <td>NaN</td>\n <td>Institut für Landwirtschaftsrecht, Georg-Augus...</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>11</th>\n <td>Nies</td>\n <td>Volkmar</td>\n <td>NaN</td>\n <td>Ltd. Landwirtschaftsdirektor</td>\n <td>Landwirtschaftskammer NRW, Bonn</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>12</th>\n <td>Stephany</td>\n <td>Ralf</td>\n <td>NaN</td>\n <td>Rechtsanwalt/Steuerberater</td>\n <td>Bonn</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>13</th>\n <td>Wedemeyer</td>\n <td>Harald</td>\n <td>NaN</td>\n <td>Rechtsanwalt</td>\n <td>Landvolk Niedersachsen, Hannover</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>14</th>\n <td>Martinez</td>\n <td>José</td>\n <td>Prof. Dr.</td>\n <td>Erster Schriftleiter</td>\n <td>Institut für Landwirtschaftsrecht, Göttingen</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>15</th>\n <td>Nies</td>\n <td>Volkmar</td>\n <td>LLD</td>\n <td>Zweiter Schriftleiter</td>\n <td>50170 Kerpen</td>\n <td>Redaktion</td>\n </tr>\n </tbody>\n</table>\n</div>" - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "outputs": [], "source": [ "model = ChatOpenAI(model_name=\"gpt-4\")\n", "df = use_model(model, template, journal_name=journal_name, website_text=website_text)\n", @@ -144,11 +127,7 @@ "df" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-01-17T19:43:51.821749100Z", - "start_time": "2024-01-17T19:43:22.643583400Z" - } + "collapsed": false }, "id": "initial_id" }, @@ -167,18 +146,8 @@ }, { "cell_type": "code", - "execution_count": 4, - "outputs": [ - { - "data": { - "text/plain": " lastname firstname title \\\n0 Busse Christian Dr. \n1 Endres Ewald Prof. Dr. \n2 Francois Matthias Dr. \n3 von Garmissen Bernd Dr. \n4 Glas Ingo NaN \n5 Graß Christiane Rechtsanwältin \n6 Haarstrich Jens Rechtsanwalt \n7 Koch Erich Dr. \n8 Köpl Christian Dr. \n9 Martinez Jose Prof. Dr. \n10 Nies Volkmar Ltd. Landwirtschaftsdirektor \n11 Stephany Ralf Rechtsanwalt/Steuerberater \n12 Wedemeyer Harald Rechtsanwalt \n\n position \\\n0 Regierungsdirektor \n1 NaN \n2 Rechtsanwalt \n3 Rechtsanwalt \n4 Rechtsanwalt \n5 NaN \n6 NaN \n7 Ltd. Verwaltungsdirektor \n8 Ministerialrat \n9 NaN \n10 NaN \n11 NaN \n12 NaN \n\n affiliation role \n0 Bundesministerium für Ernährung und Landwirtsc... Redaktion \n1 Hochschule Weihenstephan-Triesdorf, Freising Redaktion \n2 Bitburg Redaktion \n3 Göttingen Redaktion \n4 Rostock Redaktion \n5 Bonn Redaktion \n6 Peine Redaktion \n7 Sozialversicherung für Landwirtschaft, Forsten... Redaktion \n8 Bayerisches Staatsministerium für Ernährung, L... Redaktion \n9 Institut für Landwirtschaftsrecht, Georg-Augus... Redaktion \n10 Landwirtschaftskammer NRW, Bonn Redaktion \n11 Bonn Redaktion \n12 Landvolk Niedersachsen, Hannover Redaktion ", - "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>lastname</th>\n <th>firstname</th>\n <th>title</th>\n <th>position</th>\n <th>affiliation</th>\n <th>role</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Busse</td>\n <td>Christian</td>\n <td>Dr.</td>\n <td>Regierungsdirektor</td>\n <td>Bundesministerium für Ernährung und Landwirtsc...</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Endres</td>\n <td>Ewald</td>\n <td>Prof. Dr.</td>\n <td>NaN</td>\n <td>Hochschule Weihenstephan-Triesdorf, Freising</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Francois</td>\n <td>Matthias</td>\n <td>Dr.</td>\n <td>Rechtsanwalt</td>\n <td>Bitburg</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>3</th>\n <td>von Garmissen</td>\n <td>Bernd</td>\n <td>Dr.</td>\n <td>Rechtsanwalt</td>\n <td>Göttingen</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Glas</td>\n <td>Ingo</td>\n <td>NaN</td>\n <td>Rechtsanwalt</td>\n <td>Rostock</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Graß</td>\n <td>Christiane</td>\n <td>Rechtsanwältin</td>\n <td>NaN</td>\n <td>Bonn</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>6</th>\n <td>Haarstrich</td>\n <td>Jens</td>\n <td>Rechtsanwalt</td>\n <td>NaN</td>\n <td>Peine</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>7</th>\n <td>Koch</td>\n <td>Erich</td>\n <td>Dr.</td>\n <td>Ltd. Verwaltungsdirektor</td>\n <td>Sozialversicherung für Landwirtschaft, Forsten...</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Köpl</td>\n <td>Christian</td>\n <td>Dr.</td>\n <td>Ministerialrat</td>\n <td>Bayerisches Staatsministerium für Ernährung, L...</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Martinez</td>\n <td>Jose</td>\n <td>Prof. Dr.</td>\n <td>NaN</td>\n <td>Institut für Landwirtschaftsrecht, Georg-Augus...</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>10</th>\n <td>Nies</td>\n <td>Volkmar</td>\n <td>Ltd. Landwirtschaftsdirektor</td>\n <td>NaN</td>\n <td>Landwirtschaftskammer NRW, Bonn</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>11</th>\n <td>Stephany</td>\n <td>Ralf</td>\n <td>Rechtsanwalt/Steuerberater</td>\n <td>NaN</td>\n <td>Bonn</td>\n <td>Redaktion</td>\n </tr>\n <tr>\n <th>12</th>\n <td>Wedemeyer</td>\n <td>Harald</td>\n <td>Rechtsanwalt</td>\n <td>NaN</td>\n <td>Landvolk Niedersachsen, Hannover</td>\n <td>Redaktion</td>\n </tr>\n </tbody>\n</table>\n</div>" - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "outputs": [], "source": [ "model = ChatOpenAI(model_name=\"gpt-3.5-turbo\")\n", "df = use_model(model, template, journal_name=journal_name, website_text=website_text)\n", @@ -186,11 +155,7 @@ "df" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-01-17T19:46:04.139744300Z", - "start_time": "2024-01-17T19:45:46.825487Z" - } + "collapsed": false }, "id": "e1aedc5ef3cab564" }, @@ -219,17 +184,8 @@ }, { "cell_type": "code", - "execution_count": 4, - "outputs": [ - { - "data": { - "text/plain": "['Martinez, Dr. Christian Busse, Bundesministerium für Ernährung und Landwirtschaft, Bonn Agrarprodukt Recht',\n 'Prof. Dr. Ewald Endres, Hochschule Weihenstephan-Triesdorf Freising Forsting Forsting, Jagd, Fischerei, Fischerei',\n 'Lawyeranwalt Ingo Glas, Bitburg Boden Recht',\n 'Christiane Grass, Bonn Agrarzivil Recht',\n 'Jens Haarstrich, Peine Redaktionär, Rostock',\n 'Prof. Dr. Bernd von Garmissen, Göttingen Erb, Redaktion, Umwelt',\n 'Ltdr. Jose Martinez, Georg-August-Universität Göttingen, Göttingen',\n '',\n '',\n \"Note: The column 'Role' contains the following values: 'Herausgeber', 'Redaktion', 'Beirat'\"]" - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "outputs": [], "source": [ "from lib.hf_llama2_chat_gptq import query\n", "llama2_template = f\"<s>[INST] <<SYS>>You are a helpful assistant. No comments or explanation, just answer the question.<</SYS>>{template}[/INST]\"\n", @@ -238,11 +194,7 @@ "query(endpoint_url, template, journal_name=journal_name, website_text=website_text).split(\"\\n\")\n" ], "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-01-17T19:31:23.221568600Z", - "start_time": "2024-01-17T19:31:16.694648400Z" - } + "collapsed": false }, "id": "f05098a4cf2aa3dc" }, @@ -251,7 +203,7 @@ "source": [ "## TheBloke/Llama-2-70B-chat-GPTQ via Huggingface Inference Endpoint\n", "\n", - "The 70 billion parameter variant [does a bit better](data/output/editors-llama-2-70b-chat-gptq.csv) but, among other things, doesn't the academic titles right. It also cannot be persuaded to [not comment on the CSV output](data/output/editors-llama-2-70b-chat-gptq.txt). Given that the model costs $13/h to run, the result is not impressive." + "The 70 billion parameter variant [does a bit better](data/output/editors-llama-2-70b-chat-gptq.csv) but, among other things, doesn't the academic titles right. It also cannot be persuaded to [not comment on the CSV output](data/output/editors-llama-2-70b-chat-gptq.txt)." ], "metadata": { "collapsed": false @@ -260,29 +212,164 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, + "outputs": [], + "source": [ + "endpoint_url = \"https://gp8iviqlqee101a0.us-east-1.aws.endpoints.huggingface.cloud\"\n", + "query(endpoint_url, template, journal_name=journal_name, website_text=website_text).split(\"\\n\")" + ], + "metadata": { + "collapsed": false + }, + "id": "b94cf62b996bf3a2" + }, + { + "cell_type": "markdown", + "source": [ + "## mixtral-8x7b-instruct-v0-1-puk" + ], + "metadata": { + "collapsed": false + }, + "id": "5b9b6d23dcdbbdd6" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "from lib.hf_llama2_chat_gptq import query\n", + "llama2_template = f\"<s>[INST] <<SYS>>You are a helpful assistant. You answer the question without any further No comments or explanation.<</SYS>>{template}[/INST]\"\n", + "\n", + "endpoint_url = \"https://pmxm9cba6f8uvi9s.us-east-1.aws.endpoints.huggingface.cloud\"\n", + "query(endpoint_url, template, journal_name=journal_name, website_text=website_text).split(\"\\n\")" + ], + "metadata": { + "collapsed": false + }, + "id": "a5eb505982b3aafe" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "from langchain_community.llms import HuggingFaceEndpoint\n", + "\n", + "ENDPOINT_URL = \"https://pmxm9cba6f8uvi9s.us-east-1.aws.endpoints.huggingface.cloud\"\n", + "llm = HuggingFaceEndpoint(\n", + " endpoint_url=ENDPOINT_URL,\n", + " task=\"text-generation\",\n", + " model_kwargs={\n", + " \"max_new_tokens\": 512,\n", + " \"top_k\": 50,\n", + " \"temperature\": 0.1,\n", + " \"repetition_penalty\": 1.03,\n", + " },\n", + ")" + ], + "metadata": { + "collapsed": false + }, + "id": "9d5b65beac1f863e" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "\n", + "import os\n", + "\n", + "from langchain_community.llms import HuggingFaceTextGenInference\n", + "\n", + "ENDPOINT_URL = \"https://pmxm9cba6f8uvi9s.us-east-1.aws.endpoints.huggingface.cloud\"\n", + "HF_TOKEN = \"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\"\n", + "\n", + "llm = HuggingFaceTextGenInference(\n", + " inference_server_url=ENDPOINT_URL,\n", + " max_new_tokens=512,\n", + " top_k=50,\n", + " temperature=0.1,\n", + " repetition_penalty=1.03,\n", + " server_kwargs={\n", + " \"headers\": {\n", + " \"Authorization\": f\"Bearer {HF_TOKEN}\",\n", + " \"Content-Type\": \"application/json\",\n", + " }\n", + " },\n", + ")" + ], + "metadata": { + "collapsed": false + }, + "id": "58e87b84bdc2e4d3" + }, + { + "cell_type": "code", + "execution_count": 14, + "outputs": [], + "source": [ + "from langchain.schema import (\n", + " HumanMessage,\n", + " SystemMessage\n", + ")\n", + "from langchain_community.chat_models.huggingface import ChatHuggingFace\n", + "\n", + "messages = [\n", + " #SystemMessage(content=\"You're a helpful assistant\"),\n", + " HumanMessage(\n", + " content=\"What happens when an unstoppable force meets an immovable object?\"\n", + " ),\n", + "]\n", + "\n", + "chat_model = ChatHuggingFace(llm=llm)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-02-19T12:56:44.407240200Z", + "start_time": "2024-02-19T12:56:42.802384100Z" + } + }, + "id": "411c80521b6f7ccb" + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "collapsed": false + }, + "id": "9f2e4ce2c28741e0" + }, + { + "cell_type": "code", + "execution_count": 16, "outputs": [ { - "data": { - "text/plain": "' Here is the CSV data for the members of the editorial board or the advisory board of the journal \\'AUR - Agrar- und Umweltrecht\\':\\n\\n\"lastname\",\"firstname\",\"title\",\"affiliation\",\"role\"\\n\"Busse\",\"Christian\", \"Regierungsdirektor\", \"Bundesministerium für Ernährung und Landwirtschaft, Bonn\", \"Herausgeber\"\\n\"Endres\",\"Ewald\", \"Prof. Dr.\", \"Hochschule Weihenstephan-Triesdorf, Freising\", \"Redaktion\"\\n\"Francois\",\"Matthias\", \"Rechtsanwalt\", \"Bitburg\", \"Redaktion\"\\n\"Garmissen\",\"Bernd\", \"Rechtsanwalt\", \"Göttingen\", \"Redaktion\"\\n\"Graß\",\"Christiane\", \"Rechtsanwältin\", \"Bonn\", \"Redaktion\"\\n\"Haarstrich\",\"Jens\", \"Rechtsanwalt\", \"Peine\", \"Redaktion\"\\n\"Köpl\",\"Christian\", \"Ministerialrat\", \"Bayerisches Staatsministerium für Ernährung, Landwirtschaft und Forsten, München\", \"\"\\n\"Martinez\",\"Jose\", \"Prof. Dr.\", \"Institut für Landwirtschaftsrecht, Georg-August-Universität Göttingen, Göttingen\", \"Herausgeber\"\\n\"Nies\",\"Volkmar\", \"Ltd. Landwirtschaftsdirektor\", \"Landwirtschaftskammer NRW, Bonn\", \"Redaktion\"\\n\"Stephany\",\"Ralf\", \"Rechtsanwalt/Steuerberater\", \"Bonn\", \"Redaktion\"\\n\"Wedemeyer\",\"Harald\", \"Rechtsanwalt\", \"Landvolk Niedersachsen, Hannover\", \"Redaktion\"\\n\"Schell\",\"Irina Valeska\", \"\", \"Georg-August-Universität Göttingen, Göttingen\", \"\"\\n\\nNote: The column \\'role\\' is empty for some members, as their role could not be determined.'" - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " This is a classic philosophical question that has been asked for centuries, often used to illustrate a paradox. The idea of an \"unstoppable force\" implies something that cannot be stopped or slowed down, while an \"immovable object\" suggests something that cannot be moved. \n", + "\n", + "If we take these definitions literally, then when an unstoppable force meets an immovable object, it would result in a situation where neither can fulfill their inherent nature. This creates a paradox because the force cannot stop, but the object won't be moved. \n", + "\n", + "In reality, such a scenario is impossible as it defies the laws of physics. Forces and objects in the universe do not possess these absolute qualities. Instead, forces can typically be slowed, redirected, or absorbed, and objects can usually be moved, albeit sometimes with great difficulty.\n" + ] } ], "source": [ - "endpoint_url = \"https://gp8iviqlqee101a0.us-east-1.aws.endpoints.huggingface.cloud\"\n", - "query(endpoint_url, template, journal_name=journal_name, website_text=website_text).split(\"\\n\")" + "res = chat_model.invoke(messages)\n", + "print(res.content)" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-01-17T19:14:56.752808800Z", - "start_time": "2024-01-17T19:13:45.800635700Z" + "end_time": "2024-02-19T12:56:58.327411200Z", + "start_time": "2024-02-19T12:56:51.135872200Z" } }, - "id": "b94cf62b996bf3a2" + "id": "2b2cbf175a71930b" } ], "metadata": { diff --git a/langchain-experiments/data/input/journal-website.txt b/langchain-experiments/data/input/journal-website.txt index 0011b27..25079b8 100644 --- a/langchain-experiments/data/input/journal-website.txt +++ b/langchain-experiments/data/input/journal-website.txt @@ -1,17 +1,17 @@ Georg-August-Universität Göttingen Institut für Landwirtschaftsrecht - Institut - Team - Forschungsprojekte - Lehre - Veranstaltungen - Göttinger Onlinebeiträge zum Agrarrecht - LexVinum - AUR - beck-blog - - Startseite AUR +Institut +Team +Forschungsprojekte +Lehre +Veranstaltungen +Göttinger Onlinebeiträge zum Agrarrecht +LexVinum +AUR +beck-blog + +Startseite AUR Suchen English AUR - Agrar- und Umweltrecht @@ -20,31 +20,31 @@ Die Zeitschrift Agrar- und Umweltrecht ist die führende Zeitschrift in Deutschl Das Institut für Landwirtschaftsrecht ist der Sitz der Schriftleitung Die Redaktion der Zeitschrift "Agrar-und Umweltrecht": - Regierungsdirektor Dr. Christian Busse, Bundesministerium für Ernährung und Landwirtschaft, Bonn (Agrarproduktrecht) +Regierungsdirektor Dr. Christian Busse, Bundesministerium für Ernährung und Landwirtschaft, Bonn (Agrarproduktrecht) - Prof. Dr. Ewald Endres, Hochschule Weihenstephan-Triesdorf, Freising (Forst, Jagd, Fischerei) +Prof. Dr. Ewald Endres, Hochschule Weihenstephan-Triesdorf, Freising (Forst, Jagd, Fischerei) - Rechtsanwalt Dr. Matthias Francois, Bitburg (Bodenrecht) +Rechtsanwalt Dr. Matthias Francois, Bitburg (Bodenrecht) - Rechtsanwalt Dr. Bernd von Garmissen, Göttingen (Erb- und Gesellschaftsrecht) +Rechtsanwalt Dr. Bernd von Garmissen, Göttingen (Erb- und Gesellschaftsrecht) - Rechtsanwalt Ingo Glas, Rostock (Ziviles Agrarwirtschaftsrecht) +Rechtsanwalt Ingo Glas, Rostock (Ziviles Agrarwirtschaftsrecht) - Rechtsanwältin Christiane Graß, Bonn (Agrarzivilrecht) +Rechtsanwältin Christiane Graß, Bonn (Agrarzivilrecht) - Rechtsanwalt Jens Haarstrich, Peine (Agrarzivilrecht) +Rechtsanwalt Jens Haarstrich, Peine (Agrarzivilrecht) - Ltd. Verwaltungsdirektor Dr. Erich Koch, Sozialversicherung für Landwirtschaft, Forsten und Gartenbau, Kassel (Agrarsozialrecht) +Ltd. Verwaltungsdirektor Dr. Erich Koch, Sozialversicherung für Landwirtschaft, Forsten und Gartenbau, Kassel (Agrarsozialrecht) - Ministerialrat Dr. Christian Köpl, Bayerisches Staatsministerium für Ernährung, Landwirtschaft und Forsten, München (Landwirtschaftliche Betriebsmittel) +Ministerialrat Dr. Christian Köpl, Bayerisches Staatsministerium für Ernährung, Landwirtschaft und Forsten, München (Landwirtschaftliche Betriebsmittel) - Prof. Dr. Jose Martinez, Institut für Landwirtschaftsrecht, Georg-August-Universitat Göttingen, Göttingen (Agrarförderrecht) +Prof. Dr. Jose Martinez, Institut für Landwirtschaftsrecht, Georg-August-Universitat Göttingen, Göttingen (Agrarförderrecht) - Ltd. Landwirtschaftsdirektor Volkmar Nies, Landwirtschaftskammer NRW, Bonn (Agrarumweltrecht) +Ltd. Landwirtschaftsdirektor Volkmar Nies, Landwirtschaftskammer NRW, Bonn (Agrarumweltrecht) - Rechtsanwalt/Steuerberater Ralf Stephany, Bonn (Agrarsteuerrecht) +Rechtsanwalt/Steuerberater Ralf Stephany, Bonn (Agrarsteuerrecht) - Rechtsanwalt Harald Wedemeyer, Landvolk Niedersachsen, Hannover (Öffentliches Agrarwirtschaftsrecht) +Rechtsanwalt Harald Wedemeyer, Landvolk Niedersachsen, Hannover (Öffentliches Agrarwirtschaftsrecht) Zusendung von Manuskripten: @@ -54,8 +54,8 @@ Alle Artikel durchlaufen vor der Publikation ein Begutachtungsverfahren (peer re AUR - Internet-Archiv der AUR - Autorenhinweise für Beiträge in der AUR +Internet-Archiv der AUR +Autorenhinweise für Beiträge in der AUR Schriftleitung Zeitschrift Agrar und Umweltrecht: @@ -80,21 +80,21 @@ Soziale Medien Online-Dienste - Studienangebot (eCampus) - Organisation (eCampus) - Prüfungsverwaltung (FlexNow) - Lernmanagement (Stud.IP) - Studierendenportal (eCampus) - Intranet - Stellenausschreibungen - Jobportal stellenwerk +Studienangebot (eCampus) +Organisation (eCampus) +Prüfungsverwaltung (FlexNow) +Lernmanagement (Stud.IP) +Studierendenportal (eCampus) +Intranet +Stellenausschreibungen +Jobportal stellenwerk Service - Barrierefreiheit - Datenschutz - Kontakt - Notfall - Lageplan - Impressum +Barrierefreiheit +Datenschutz +Kontakt +Notfall +Lageplan +Impressum diff --git a/langchain-experiments/readme.md b/langchain-experiments/readme.md index e7d1436..eb55c68 100644 --- a/langchain-experiments/readme.md +++ b/langchain-experiments/readme.md @@ -7,6 +7,9 @@ problems): `pip install pydantic -U` `pip install pydantic==1.10.11` `pip install python-dotenv langchain langchain-cli openai huggingface_hub langchain_openai` +`pip install text-generation transformers numexpr langchainhub sentencepiece jinja2` You need to copy `.env.dist` to `.env` and add values for the `OPENAI_API_KEY` and `HUGGINGFACEHUB_API_TOKEN` -emvironment variables. \ No newline at end of file +emvironment variables. + + -- GitLab