From dc1f90d72a5563808f2381c474504b66c2b06ff8 Mon Sep 17 00:00:00 2001 From: Christian Boulanger <info@bibliograph.org> Date: Wed, 17 Jan 2024 22:08:40 +0100 Subject: [PATCH] Updates --- langchain-experiments/compare-models.ipynb | 41 +++++++++++-------- .../lib/hf_llama2_70b_chat_gptq.py | 35 ---------------- ...3b_chat_gptq.py => hf_llama2_chat_gptq.py} | 10 ++--- 3 files changed, 29 insertions(+), 57 deletions(-) delete mode 100644 langchain-experiments/lib/hf_llama2_70b_chat_gptq.py rename langchain-experiments/lib/{hf_llama2_13b_chat_gptq.py => hf_llama2_chat_gptq.py} (66%) diff --git a/langchain-experiments/compare-models.ipynb b/langchain-experiments/compare-models.ipynb index 2901942..f9ebba8 100644 --- a/langchain-experiments/compare-models.ipynb +++ b/langchain-experiments/compare-models.ipynb @@ -5,10 +5,10 @@ "source": [ "# Comparing OpenAI and open LLMs\n", "\n", - "Using the text-only content of the website of the journal AUR - Agrar- und Umweltrecht, we compare the performance of\n", - "GPT-4, GPT-3.5-turbo and Models available on Huggingface.\n", + "Using the [text-only content of the website of the journal AUR - Agrar- und Umweltrecht](langchain-experiments/data/input/journal-website.txt), \n", + "we compare the performance of GPT-4, GPT-3.5-turbo and Models available on Huggingface.\n", "\n", - "## Prpeparation\n", + "## Preparation\n", "\n", "Import dependencies, define shorthand functions, and prepare test data" ], @@ -65,7 +65,15 @@ "source": [ "## Prompt\n", "\n", - "OpenAI's GPT-4 works perfectly with a German-language Prompt, and a very minimal prompt returns the data we need. In contrast, the open models performed miserably with that prompt. We therefore use English and provide very detailed instructions. " + "OpenAI's GPT-4 works perfectly with a minimal, German-language prompt, and infers the meaning of the columns\n", + "to returns the data we need:\n", + "\n", + "```\n", + "Finde im folgenden Text die Herausgeber, Redaktion/Schriftleitung und Beirat der Zeitschrift '{journal_name}' und gebe sie im CSV-Format zurück mit den Spalten 'lastname', 'firstname', 'title', 'position', 'affiliation','role'. Die Spalte 'role' enthält entweder 'Herausgeber', 'Redaktion', 'Beirat', 'Schriftleitung' oder ist leer wenn nicht bestimmbar. Wenn keine passenden Informationen verfügbar sind, gebe nur den CSV-Header zurück. Setze alle Werte in den CSV-Spalten in Anführungszeichen.\"\n", + "````\n", + "\n", + "\n", + "In contrast, the open models performed miserably with such a prompt. We therefore use English and provide very detailed instructions. " ], "metadata": { "collapsed": false @@ -74,7 +82,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "outputs": [], "source": [ "template = \"\"\"\n", @@ -96,8 +104,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-01-17T19:43:17.827291400Z", - "start_time": "2024-01-17T19:43:17.811637900Z" + "end_time": "2024-01-17T20:46:02.624154500Z", + "start_time": "2024-01-17T20:46:02.598111600Z" } }, "id": "23aef80911796078" @@ -107,8 +115,7 @@ "source": [ "## ChatGPT-4 \n", "\n", - "GPT-4 delivers an almost perfect [result](data/output/editors-openai-gpt-4.csv). There are some problems left which could be \n", - "resolved by adding some more instructions to the prompt. \n", + "GPT-4 delivers an almost perfect [result](data/output/editors-openai-gpt-4.csv). There are some problems left which could be resolved by adding some more instructions to the prompt. \n", "\n" ], "metadata": { @@ -190,7 +197,8 @@ { "cell_type": "markdown", "source": [ - "Now, let's try the open models via the Huggingface Inference Endpoint" + "Now, let's try the open models via the Huggingface Inference Endpoint. For this to work, you need to deploy\n", + "endpoints via https://ui.endpoints.huggingface.co/ and update the value of `enpoint_url` below." ], "metadata": { "collapsed": false @@ -223,10 +231,11 @@ } ], "source": [ - "from lib.hf_llama2_13b_chat_gptq import query\n", + "from lib.hf_llama2_chat_gptq import query\n", + "llama2_template = f\"<s>[INST] <<SYS>>You are a helpful assistant. No comments or explanation, just answer the question.<</SYS>>{template}[/INST]\"\n", "\n", - "lines = query(template, journal_name=journal_name, website_text=website_text).split(\"\\n\")[4:]\n", - "lines" + "endpoint_url = \"https://z8afrqamxvaaitmf.us-east-1.aws.endpoints.huggingface.cloud\"\n", + "query(endpoint_url, template, journal_name=journal_name, website_text=website_text).split(\"\\n\")\n" ], "metadata": { "collapsed": false, @@ -242,7 +251,7 @@ "source": [ "## TheBloke/Llama-2-70B-chat-GPTQ via Huggingface Inference Endpoint\n", "\n", - "The 70 billion parameter variant [does a bit better](data/output/editors-llama-2-70b-chat-gptq.csv) but, among other things, doesn't the academic titles right. It also cannot be persuaded to [not comment on the CSV output].(data/output/editors-llama-2-70b-chat-gptq.txt)" + "The 70 billion parameter variant [does a bit better](data/output/editors-llama-2-70b-chat-gptq.csv) but, among other things, doesn't the academic titles right. It also cannot be persuaded to [not comment on the CSV output].(data/output/editors-llama-2-70b-chat-gptq.txt). Given that the model costs $13/h to run, that's not really that impressive." ], "metadata": { "collapsed": false @@ -263,8 +272,8 @@ } ], "source": [ - "from lib.hf_llama2_70b_chat_gptq import query\n", - "query(template, journal_name=journal_name, website_text=website_text)" + "endpoint_url = \"https://gp8iviqlqee101a0.us-east-1.aws.endpoints.huggingface.cloud\"\n", + "query(endpoint_url, template, journal_name=journal_name, website_text=website_text).split(\"\\n\")" ], "metadata": { "collapsed": false, diff --git a/langchain-experiments/lib/hf_llama2_70b_chat_gptq.py b/langchain-experiments/lib/hf_llama2_70b_chat_gptq.py deleted file mode 100644 index 33b3bdd..0000000 --- a/langchain-experiments/lib/hf_llama2_70b_chat_gptq.py +++ /dev/null @@ -1,35 +0,0 @@ -import requests -import os -from dotenv import load_dotenv -from json import JSONDecodeError -import os - -load_dotenv() - -API_KEY = os.getenv("HUGGINGFACEHUB_API_TOKEN") -API_URL = "https://gp8iviqlqee101a0.us-east-1.aws.endpoints.huggingface.cloud" -headers = { - "Accept" : "application/json", - "Authorization": f"Bearer {API_KEY}", - "Content-Type": "application/json" -} - -def query(template, model_params = None, **params): - if model_params is None: - model_params = { - "temperature": 0.1, - "max_new_tokens": 1000 - } - prompt = template.format_map(params) - payload = { - "inputs": f"<s>[INST] <<SYS>>You are a helpful assistant. No comments or explanation, just answer the question.<</SYS>>{prompt}[/INST]", - "parameters": model_params - } - response = requests.post(API_URL, headers=headers, json=payload) - response.raise_for_status() - try: - return response.json()[0].get("generated_text") - except JSONDecodeError: - with open('tmp/response.txt', "w", encoding='utf-8') as f: - f.write(response.text) - raise RuntimeError(f'Cannot parse response from {response.url}. See tmp/response.txt') diff --git a/langchain-experiments/lib/hf_llama2_13b_chat_gptq.py b/langchain-experiments/lib/hf_llama2_chat_gptq.py similarity index 66% rename from langchain-experiments/lib/hf_llama2_13b_chat_gptq.py rename to langchain-experiments/lib/hf_llama2_chat_gptq.py index dfc4311..cdd1a6f 100644 --- a/langchain-experiments/lib/hf_llama2_13b_chat_gptq.py +++ b/langchain-experiments/lib/hf_llama2_chat_gptq.py @@ -1,5 +1,4 @@ import requests -import os from dotenv import load_dotenv from json import JSONDecodeError import os @@ -7,25 +6,24 @@ import os load_dotenv() API_KEY = os.getenv("HUGGINGFACEHUB_API_TOKEN") -API_URL = "https://z8afrqamxvaaitmf.us-east-1.aws.endpoints.huggingface.cloud" headers = { "Accept" : "application/json", "Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json" } -def query(template, model_params = None, **params): +def query(url, template, model_params = None, **params): if model_params is None: model_params = { "temperature": 0.1, "max_new_tokens": 2000 } - prompt = template.format_map(params) + inputs = template.format_map(**params) payload = { - "inputs": f"<s>[INST] <<SYS>>You are a helpful assistant. No comments or explanation, just answer the question.<</SYS>>{prompt}[/INST]", + "inputs": inputs, "parameters": model_params } - response = requests.post(API_URL, headers=headers, json=payload) + response = requests.post(url, headers=headers, json=payload) response.raise_for_status() try: return response.json()[0].get("generated_text") -- GitLab