From dc1f90d72a5563808f2381c474504b66c2b06ff8 Mon Sep 17 00:00:00 2001
From: Christian Boulanger <info@bibliograph.org>
Date: Wed, 17 Jan 2024 22:08:40 +0100
Subject: [PATCH] Updates

---
 langchain-experiments/compare-models.ipynb    | 41 +++++++++++--------
 .../lib/hf_llama2_70b_chat_gptq.py            | 35 ----------------
 ...3b_chat_gptq.py => hf_llama2_chat_gptq.py} | 10 ++---
 3 files changed, 29 insertions(+), 57 deletions(-)
 delete mode 100644 langchain-experiments/lib/hf_llama2_70b_chat_gptq.py
 rename langchain-experiments/lib/{hf_llama2_13b_chat_gptq.py => hf_llama2_chat_gptq.py} (66%)

diff --git a/langchain-experiments/compare-models.ipynb b/langchain-experiments/compare-models.ipynb
index 2901942..f9ebba8 100644
--- a/langchain-experiments/compare-models.ipynb
+++ b/langchain-experiments/compare-models.ipynb
@@ -5,10 +5,10 @@
    "source": [
     "# Comparing OpenAI and open LLMs\n",
     "\n",
-    "Using the text-only content of the website of the journal AUR - Agrar- und Umweltrecht, we compare the performance of\n",
-    "GPT-4, GPT-3.5-turbo and Models available on Huggingface.\n",
+    "Using the [text-only content of the website of the journal AUR - Agrar- und Umweltrecht](langchain-experiments/data/input/journal-website.txt), \n",
+    "we compare the performance of GPT-4, GPT-3.5-turbo and Models available on Huggingface.\n",
     "\n",
-    "## Prpeparation\n",
+    "## Preparation\n",
     "\n",
     "Import dependencies, define shorthand functions, and prepare test data"
    ],
@@ -65,7 +65,15 @@
    "source": [
     "## Prompt\n",
     "\n",
-    "OpenAI's GPT-4 works perfectly with a German-language Prompt, and a very minimal prompt returns the data we need. In contrast, the open models performed miserably with that prompt. We therefore use English and provide very detailed instructions.  "
+    "OpenAI's GPT-4 works perfectly with a minimal, German-language prompt, and infers the meaning of the columns\n",
+    "to returns the data we need:\n",
+    "\n",
+    "```\n",
+    "Finde im folgenden Text die Herausgeber, Redaktion/Schriftleitung und Beirat der Zeitschrift '{journal_name}' und gebe sie im CSV-Format zurÃ¼ck mit den Spalten 'lastname', 'firstname', 'title', 'position', 'affiliation','role'. Die Spalte 'role' enthÃ¤lt entweder 'Herausgeber', 'Redaktion', 'Beirat', 'Schriftleitung' oder ist leer wenn nicht bestimmbar. Wenn keine passenden Informationen verfÃ¼gbar sind, gebe nur den CSV-Header zurÃ¼ck. Setze alle Werte in den CSV-Spalten in AnfÃ¼hrungszeichen.\"\n",
+    "````\n",
+    "\n",
+    "\n",
+    "In contrast, the open models performed miserably with such a prompt. We therefore use English and provide very detailed instructions.  "
    ],
    "metadata": {
     "collapsed": false
@@ -74,7 +82,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 5,
    "outputs": [],
    "source": [
     "template = \"\"\"\n",
@@ -96,8 +104,8 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2024-01-17T19:43:17.827291400Z",
-     "start_time": "2024-01-17T19:43:17.811637900Z"
+     "end_time": "2024-01-17T20:46:02.624154500Z",
+     "start_time": "2024-01-17T20:46:02.598111600Z"
     }
    },
    "id": "23aef80911796078"
@@ -107,8 +115,7 @@
    "source": [
     "## ChatGPT-4 \n",
     "\n",
-    "GPT-4 delivers an almost perfect [result](data/output/editors-openai-gpt-4.csv). There are some problems left which could be \n",
-    "resolved by adding some more instructions to the prompt. \n",
+    "GPT-4 delivers an almost perfect [result](data/output/editors-openai-gpt-4.csv). There are some problems left which could be resolved by adding some more instructions to the prompt. \n",
     "\n"
    ],
    "metadata": {
@@ -190,7 +197,8 @@
   {
    "cell_type": "markdown",
    "source": [
-    "Now, let's try the open models via the Huggingface Inference Endpoint"
+    "Now, let's try the open models via the Huggingface Inference Endpoint. For this to work, you need to deploy\n",
+    "endpoints via https://ui.endpoints.huggingface.co/ and update the value of `enpoint_url` below."
    ],
    "metadata": {
     "collapsed": false
@@ -223,10 +231,11 @@
     }
    ],
    "source": [
-    "from lib.hf_llama2_13b_chat_gptq import query\n",
+    "from lib.hf_llama2_chat_gptq import query\n",
+    "llama2_template = f\"<s>[INST] <<SYS>>You are a helpful assistant. No comments or explanation, just answer the question.<</SYS>>{template}[/INST]\"\n",
     "\n",
-    "lines = query(template, journal_name=journal_name, website_text=website_text).split(\"\\n\")[4:]\n",
-    "lines"
+    "endpoint_url = \"https://z8afrqamxvaaitmf.us-east-1.aws.endpoints.huggingface.cloud\"\n",
+    "query(endpoint_url, template, journal_name=journal_name, website_text=website_text).split(\"\\n\")\n"
    ],
    "metadata": {
     "collapsed": false,
@@ -242,7 +251,7 @@
    "source": [
     "## TheBloke/Llama-2-70B-chat-GPTQ via Huggingface Inference Endpoint\n",
     "\n",
-    "The 70 billion parameter variant [does a bit better](data/output/editors-llama-2-70b-chat-gptq.csv) but, among other things, doesn't the academic titles right. It also cannot be persuaded to [not comment on the CSV output].(data/output/editors-llama-2-70b-chat-gptq.txt)"
+    "The 70 billion parameter variant [does a bit better](data/output/editors-llama-2-70b-chat-gptq.csv) but, among other things, doesn't the academic titles right. It also cannot be persuaded to [not comment on the CSV output].(data/output/editors-llama-2-70b-chat-gptq.txt). Given that the model costs $13/h to run, that's not really that impressive."
    ],
    "metadata": {
     "collapsed": false
@@ -263,8 +272,8 @@
     }
    ],
    "source": [
-    "from lib.hf_llama2_70b_chat_gptq import query\n",
-    "query(template, journal_name=journal_name, website_text=website_text)"
+    "endpoint_url = \"https://gp8iviqlqee101a0.us-east-1.aws.endpoints.huggingface.cloud\"\n",
+    "query(endpoint_url, template, journal_name=journal_name, website_text=website_text).split(\"\\n\")"
    ],
    "metadata": {
     "collapsed": false,
diff --git a/langchain-experiments/lib/hf_llama2_70b_chat_gptq.py b/langchain-experiments/lib/hf_llama2_70b_chat_gptq.py
deleted file mode 100644
index 33b3bdd..0000000
--- a/langchain-experiments/lib/hf_llama2_70b_chat_gptq.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import requests
-import os
-from dotenv import load_dotenv
-from json import JSONDecodeError
-import os
-
-load_dotenv()
-
-API_KEY = os.getenv("HUGGINGFACEHUB_API_TOKEN")
-API_URL = "https://gp8iviqlqee101a0.us-east-1.aws.endpoints.huggingface.cloud"
-headers = {
-    "Accept" : "application/json",
-    "Authorization": f"Bearer {API_KEY}",
-    "Content-Type": "application/json"
-}
-
-def query(template, model_params = None, **params):
-    if model_params  is None:
-        model_params = {
-            "temperature": 0.1,
-            "max_new_tokens": 1000
-        }
-    prompt = template.format_map(params)
-    payload = {
-        "inputs": f"<s>[INST] <<SYS>>You are a helpful assistant. No comments or explanation, just answer the question.<</SYS>>{prompt}[/INST]",
-        "parameters": model_params
-    }
-    response = requests.post(API_URL, headers=headers, json=payload)
-    response.raise_for_status()
-    try:
-        return response.json()[0].get("generated_text")
-    except JSONDecodeError:
-        with open('tmp/response.txt', "w", encoding='utf-8') as f:
-            f.write(response.text)
-        raise RuntimeError(f'Cannot parse response from {response.url}. See tmp/response.txt')
diff --git a/langchain-experiments/lib/hf_llama2_13b_chat_gptq.py b/langchain-experiments/lib/hf_llama2_chat_gptq.py
similarity index 66%
rename from langchain-experiments/lib/hf_llama2_13b_chat_gptq.py
rename to langchain-experiments/lib/hf_llama2_chat_gptq.py
index dfc4311..cdd1a6f 100644
--- a/langchain-experiments/lib/hf_llama2_13b_chat_gptq.py
+++ b/langchain-experiments/lib/hf_llama2_chat_gptq.py
@@ -1,5 +1,4 @@
 import requests
-import os
 from dotenv import load_dotenv
 from json import JSONDecodeError
 import os
@@ -7,25 +6,24 @@ import os
 load_dotenv()
 
 API_KEY = os.getenv("HUGGINGFACEHUB_API_TOKEN")
-API_URL = "https://z8afrqamxvaaitmf.us-east-1.aws.endpoints.huggingface.cloud"
 headers = {
     "Accept" : "application/json",
     "Authorization": f"Bearer {API_KEY}",
     "Content-Type": "application/json"
 }
 
-def query(template, model_params = None, **params):
+def query(url, template, model_params = None, **params):
     if model_params  is None:
         model_params = {
             "temperature": 0.1,
             "max_new_tokens": 2000
         }
-    prompt = template.format_map(params)
+    inputs = template.format_map(**params)
     payload = {
-        "inputs": f"<s>[INST] <<SYS>>You are a helpful assistant. No comments or explanation, just answer the question.<</SYS>>{prompt}[/INST]",
+        "inputs": inputs,
         "parameters": model_params
     }
-    response = requests.post(API_URL, headers=headers, json=payload)
+    response = requests.post(url, headers=headers, json=payload)
     response.raise_for_status()
     try:
         return response.json()[0].get("generated_text")
-- 
GitLab