From 95b65ae71598f43d071d5f4dea8ba98bfcaeb81e Mon Sep 17 00:00:00 2001 From: Christian Boulanger <boulanger@lhlt.mpg.de> Date: Tue, 5 Mar 2024 10:50:07 +0100 Subject: [PATCH] add gitignore --- cuda/.gitignore | 1 + cuda/gemma-finetuning.ipynb | 660 +++++++++++++++++++++++++++++++++--- 2 files changed, 621 insertions(+), 40 deletions(-) create mode 100644 cuda/.gitignore diff --git a/cuda/.gitignore b/cuda/.gitignore new file mode 100644 index 0000000..2eea525 --- /dev/null +++ b/cuda/.gitignore @@ -0,0 +1 @@ +.env \ No newline at end of file diff --git a/cuda/gemma-finetuning.ipynb b/cuda/gemma-finetuning.ipynb index deaf6cd..41e33f0 100644 --- a/cuda/gemma-finetuning.ipynb +++ b/cuda/gemma-finetuning.ipynb @@ -20,12 +20,605 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "13bfb820-9615-48bd-96a9-5f454f1e67a9", "metadata": { - "tags": [] + "tags": [], + "ExecuteTime": { + "end_time": "2024-03-05T08:53:33.277048200Z", + "start_time": "2024-03-05T08:48:47.405777Z" + } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting transformers\n", + " Downloading transformers-4.38.2-py3-none-any.whl.metadata (130 kB)\n", + " ---------------------------------------- 0.0/130.7 kB ? eta -:--:--\n", + " --- ------------------------------------ 10.2/130.7 kB ? eta -:--:--\n", + " --------------------------- ----------- 92.2/130.7 kB 1.3 MB/s eta 0:00:01\n", + " -------------------------------------- 130.7/130.7 kB 1.3 MB/s eta 0:00:00\n", + "Collecting datasets\n", + " Downloading datasets-2.18.0-py3-none-any.whl.metadata (20 kB)\n", + "Collecting peft\n", + " Downloading peft-0.9.0-py3-none-any.whl.metadata (13 kB)\n", + "Requirement already satisfied: python-dotenv in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (0.21.0)\n", + "Collecting accelerate\n", + " Downloading accelerate-0.27.2-py3-none-any.whl.metadata (18 kB)\n", + "Collecting trl\n", + " Downloading trl-0.7.11-py3-none-any.whl.metadata (10 kB)\n", + "Requirement already satisfied: filelock in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from transformers) (3.13.1)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from transformers) (0.20.2)\n", + "Requirement already satisfied: numpy>=1.17 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from transformers) (1.26.3)\n", + "Requirement already satisfied: packaging>=20.0 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from transformers) (23.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from transformers) (6.0.1)\n", + "Requirement already satisfied: regex!=2019.12.17 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from transformers) (2023.12.25)\n", + "Requirement already satisfied: requests in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from transformers) (2.31.0)\n", + "Collecting tokenizers<0.19,>=0.14 (from transformers)\n", + " Downloading tokenizers-0.15.2-cp311-none-win_amd64.whl.metadata (6.8 kB)\n", + "Collecting safetensors>=0.4.1 (from transformers)\n", + " Downloading safetensors-0.4.2-cp311-none-win_amd64.whl.metadata (3.9 kB)\n", + "Requirement already satisfied: tqdm>=4.27 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from transformers) (4.65.0)\n", + "Collecting pyarrow>=12.0.0 (from datasets)\n", + " Downloading pyarrow-15.0.0-cp311-cp311-win_amd64.whl.metadata (3.1 kB)\n", + "Collecting pyarrow-hotfix (from datasets)\n", + " Downloading pyarrow_hotfix-0.6-py3-none-any.whl.metadata (3.6 kB)\n", + "Collecting dill<0.3.9,>=0.3.0 (from datasets)\n", + " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", + "Requirement already satisfied: pandas in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from datasets) (2.1.4)\n", + "Collecting xxhash (from datasets)\n", + " Downloading xxhash-3.4.1-cp311-cp311-win_amd64.whl.metadata (12 kB)\n", + "Collecting multiprocess (from datasets)\n", + " Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)\n", + "Requirement already satisfied: fsspec<=2024.2.0,>=2023.1.0 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from fsspec[http]<=2024.2.0,>=2023.1.0->datasets) (2023.12.2)\n", + "Requirement already satisfied: aiohttp in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from datasets) (3.9.1)\n", + "Requirement already satisfied: psutil in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from peft) (5.9.0)\n", + "Collecting torch>=1.13.0 (from peft)\n", + " Downloading torch-2.2.1-cp311-cp311-win_amd64.whl.metadata (26 kB)\n", + "Collecting tyro>=0.5.11 (from trl)\n", + " Downloading tyro-0.7.3-py3-none-any.whl.metadata (7.7 kB)\n", + "Requirement already satisfied: attrs>=17.3.0 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from aiohttp->datasets) (23.1.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from aiohttp->datasets) (6.0.4)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from aiohttp->datasets) (1.9.4)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from aiohttp->datasets) (1.4.1)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from aiohttp->datasets) (1.3.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (4.9.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from requests->transformers) (2.0.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from requests->transformers) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from requests->transformers) (1.26.18)\n", + "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from requests->transformers) (2023.11.17)\n", + "Collecting sympy (from torch>=1.13.0->peft)\n", + " Downloading sympy-1.12-py3-none-any.whl.metadata (12 kB)\n", + "Requirement already satisfied: networkx in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from torch>=1.13.0->peft) (3.2.1)\n", + "Requirement already satisfied: jinja2 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from torch>=1.13.0->peft) (3.1.2)\n", + "Requirement already satisfied: colorama in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from tqdm>=4.27->transformers) (0.4.6)\n", + "Collecting docstring-parser>=0.14.1 (from tyro>=0.5.11->trl)\n", + " Downloading docstring_parser-0.15-py3-none-any.whl.metadata (2.4 kB)\n", + "Requirement already satisfied: rich>=11.1.0 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from tyro>=0.5.11->trl) (13.7.0)\n", + "Collecting shtab>=1.5.6 (from tyro>=0.5.11->trl)\n", + " Downloading shtab-1.7.0-py3-none-any.whl.metadata (7.3 kB)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from pandas->datasets) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from pandas->datasets) (2023.3.post1)\n", + "Requirement already satisfied: tzdata>=2022.1 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from pandas->datasets) (2023.3)\n", + "Requirement already satisfied: six>=1.5 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from rich>=11.1.0->tyro>=0.5.11->trl) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from rich>=11.1.0->tyro>=0.5.11->trl) (2.15.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from jinja2->torch>=1.13.0->peft) (2.1.3)\n", + "Collecting mpmath>=0.19 (from sympy->torch>=1.13.0->peft)\n", + " Downloading mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)\n", + "Requirement already satisfied: mdurl~=0.1 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from markdown-it-py>=2.2.0->rich>=11.1.0->tyro>=0.5.11->trl) (0.1.2)\n", + "Downloading transformers-4.38.2-py3-none-any.whl (8.5 MB)\n", + " ---------------------------------------- 0.0/8.5 MB ? eta -:--:--\n", + " -- ------------------------------------- 0.6/8.5 MB 11.8 MB/s eta 0:00:01\n", + " ------ --------------------------------- 1.5/8.5 MB 18.8 MB/s eta 0:00:01\n", + " ------------ --------------------------- 2.6/8.5 MB 20.6 MB/s eta 0:00:01\n", + " ----------------- ---------------------- 3.7/8.5 MB 23.4 MB/s eta 0:00:01\n", + " ---------------------- ----------------- 4.8/8.5 MB 23.4 MB/s eta 0:00:01\n", + " --------------------------- ------------ 5.8/8.5 MB 23.3 MB/s eta 0:00:01\n", + " -------------------------------- ------- 7.0/8.5 MB 24.8 MB/s eta 0:00:01\n", + " ------------------------------------- -- 8.0/8.5 MB 24.5 MB/s eta 0:00:01\n", + " --------------------------------------- 8.5/8.5 MB 24.9 MB/s eta 0:00:01\n", + " ---------------------------------------- 8.5/8.5 MB 21.0 MB/s eta 0:00:00\n", + "Downloading datasets-2.18.0-py3-none-any.whl (510 kB)\n", + " ---------------------------------------- 0.0/510.5 kB ? eta -:--:--\n", + " -------------------------------- ------ 419.8/510.5 kB 12.8 MB/s eta 0:00:01\n", + " -------------------------------------- - 491.5/510.5 kB 5.1 MB/s eta 0:00:01\n", + " --------------------------------------- 501.8/510.5 kB 4.5 MB/s eta 0:00:01\n", + " --------------------------------------- 501.8/510.5 kB 4.5 MB/s eta 0:00:01\n", + " ---------------------------------------- 510.5/510.5 kB 2.7 MB/s eta 0:00:00\n", + "Downloading peft-0.9.0-py3-none-any.whl (190 kB)\n", + " ---------------------------------------- 0.0/190.9 kB ? eta -:--:--\n", + " -------------------------------------- - 184.3/190.9 kB ? eta -:--:--\n", + " ---------------------------------------- 190.9/190.9 kB 5.6 MB/s eta 0:00:00\n", + "Downloading accelerate-0.27.2-py3-none-any.whl (279 kB)\n", + " ---------------------------------------- 0.0/280.0 kB ? eta -:--:--\n", + " ---------------------------------------- 280.0/280.0 kB 8.7 MB/s eta 0:00:00\n", + "Downloading trl-0.7.11-py3-none-any.whl (155 kB)\n", + " ---------------------------------------- 0.0/155.3 kB ? eta -:--:--\n", + " ---------------------------------------- 155.3/155.3 kB 4.7 MB/s eta 0:00:00\n", + "Downloading dill-0.3.8-py3-none-any.whl (116 kB)\n", + " ---------------------------------------- 0.0/116.3 kB ? eta -:--:--\n", + " ---------------------------------------- 116.3/116.3 kB 2.3 MB/s eta 0:00:00\n", + "Downloading pyarrow-15.0.0-cp311-cp311-win_amd64.whl (24.8 MB)\n", + " ---------------------------------------- 0.0/24.8 MB ? eta -:--:--\n", + " -- ------------------------------------- 1.5/24.8 MB 31.6 MB/s eta 0:00:01\n", + " ---- ----------------------------------- 2.5/24.8 MB 27.0 MB/s eta 0:00:01\n", + " ----- ---------------------------------- 3.6/24.8 MB 28.8 MB/s eta 0:00:01\n", + " ------- -------------------------------- 4.7/24.8 MB 27.2 MB/s eta 0:00:01\n", + " --------- ------------------------------ 5.8/24.8 MB 28.4 MB/s eta 0:00:01\n", + " ---------- ----------------------------- 6.8/24.8 MB 27.1 MB/s eta 0:00:01\n", + " ------------ --------------------------- 7.9/24.8 MB 26.6 MB/s eta 0:00:01\n", + " -------------- ------------------------- 9.0/24.8 MB 26.2 MB/s eta 0:00:01\n", + " ---------------- ----------------------- 10.0/24.8 MB 26.7 MB/s eta 0:00:01\n", + " ----------------- ---------------------- 11.1/24.8 MB 27.3 MB/s eta 0:00:01\n", + " ------------------- -------------------- 12.1/24.8 MB 26.2 MB/s eta 0:00:01\n", + " --------------------- ------------------ 13.1/24.8 MB 26.2 MB/s eta 0:00:01\n", + " ---------------------- ----------------- 14.1/24.8 MB 27.3 MB/s eta 0:00:01\n", + " ------------------------ --------------- 15.3/24.8 MB 26.2 MB/s eta 0:00:01\n", + " -------------------------- ------------- 16.3/24.8 MB 26.2 MB/s eta 0:00:01\n", + " --------------------------- ------------ 17.3/24.8 MB 26.2 MB/s eta 0:00:01\n", + " ----------------------------- ---------- 18.4/24.8 MB 26.2 MB/s eta 0:00:01\n", + " ------------------------------- -------- 19.4/24.8 MB 26.2 MB/s eta 0:00:01\n", + " -------------------------------- ------- 20.4/24.8 MB 26.2 MB/s eta 0:00:01\n", + " ---------------------------------- ----- 21.4/24.8 MB 26.2 MB/s eta 0:00:01\n", + " ------------------------------------ --- 22.4/24.8 MB 25.2 MB/s eta 0:00:01\n", + " ------------------------------------- -- 23.3/24.8 MB 25.2 MB/s eta 0:00:01\n", + " --------------------------------------- 24.3/24.8 MB 25.2 MB/s eta 0:00:01\n", + " --------------------------------------- 24.8/24.8 MB 25.2 MB/s eta 0:00:01\n", + " --------------------------------------- 24.8/24.8 MB 25.2 MB/s eta 0:00:01\n", + " ---------------------------------------- 24.8/24.8 MB 20.4 MB/s eta 0:00:00\n", + "Downloading safetensors-0.4.2-cp311-none-win_amd64.whl (269 kB)\n", + " ---------------------------------------- 0.0/269.6 kB ? eta -:--:--\n", + " ---------------------------------------- 269.6/269.6 kB 8.4 MB/s eta 0:00:00\n", + "Downloading tokenizers-0.15.2-cp311-none-win_amd64.whl (2.2 MB)\n", + " ---------------------------------------- 0.0/2.2 MB ? eta -:--:--\n", + " ----------------------- ---------------- 1.3/2.2 MB 40.6 MB/s eta 0:00:01\n", + " --------------------------------------- 2.2/2.2 MB 28.0 MB/s eta 0:00:01\n", + " ---------------------------------------- 2.2/2.2 MB 20.0 MB/s eta 0:00:00\n", + "Downloading torch-2.2.1-cp311-cp311-win_amd64.whl (198.6 MB)\n", + " ---------------------------------------- 0.0/198.6 MB ? eta -:--:--\n", + " ---------------------------------------- 1.1/198.6 MB 35.3 MB/s eta 0:00:06\n", + " ---------------------------------------- 2.1/198.6 MB 33.0 MB/s eta 0:00:06\n", + " --------------------------------------- 3.1/198.6 MB 28.0 MB/s eta 0:00:07\n", + " --------------------------------------- 4.0/198.6 MB 28.5 MB/s eta 0:00:07\n", + " - -------------------------------------- 5.0/198.6 MB 26.8 MB/s eta 0:00:08\n", + " - -------------------------------------- 6.0/198.6 MB 25.4 MB/s eta 0:00:08\n", + " - -------------------------------------- 7.0/198.6 MB 24.8 MB/s eta 0:00:08\n", + " - -------------------------------------- 7.9/198.6 MB 25.3 MB/s eta 0:00:08\n", + " - -------------------------------------- 9.0/198.6 MB 26.1 MB/s eta 0:00:08\n", + " -- ------------------------------------- 10.1/198.6 MB 25.9 MB/s eta 0:00:08\n", + " -- ------------------------------------- 11.1/198.6 MB 25.2 MB/s eta 0:00:08\n", + " -- ------------------------------------- 12.0/198.6 MB 24.2 MB/s eta 0:00:08\n", + " -- ------------------------------------- 13.0/198.6 MB 25.2 MB/s eta 0:00:08\n", + " -- ------------------------------------- 14.1/198.6 MB 24.2 MB/s eta 0:00:08\n", + " --- ------------------------------------ 15.2/198.6 MB 25.1 MB/s eta 0:00:08\n", + " --- ------------------------------------ 16.1/198.6 MB 25.2 MB/s eta 0:00:08\n", + " --- ------------------------------------ 17.1/198.6 MB 25.1 MB/s eta 0:00:08\n", + " --- ------------------------------------ 18.2/198.6 MB 25.1 MB/s eta 0:00:08\n", + " --- ------------------------------------ 19.1/198.6 MB 25.2 MB/s eta 0:00:08\n", + " ---- ----------------------------------- 20.2/198.6 MB 25.1 MB/s eta 0:00:08\n", + " ---- ----------------------------------- 21.1/198.6 MB 24.2 MB/s eta 0:00:08\n", + " ---- ----------------------------------- 22.3/198.6 MB 26.2 MB/s eta 0:00:07\n", + " ---- ----------------------------------- 23.4/198.6 MB 25.1 MB/s eta 0:00:07\n", + " ---- ----------------------------------- 24.5/198.6 MB 25.1 MB/s eta 0:00:07\n", + " ----- ---------------------------------- 25.4/198.6 MB 25.2 MB/s eta 0:00:07\n", + " ----- ---------------------------------- 26.3/198.6 MB 25.1 MB/s eta 0:00:07\n", + " ----- ---------------------------------- 27.4/198.6 MB 26.2 MB/s eta 0:00:07\n", + " ----- ---------------------------------- 28.4/198.6 MB 25.2 MB/s eta 0:00:07\n", + " ----- ---------------------------------- 29.5/198.6 MB 26.2 MB/s eta 0:00:07\n", + " ------ --------------------------------- 30.6/198.6 MB 26.2 MB/s eta 0:00:07\n", + " ------ --------------------------------- 31.7/198.6 MB 26.2 MB/s eta 0:00:07\n", + " ------ --------------------------------- 32.8/198.6 MB 26.2 MB/s eta 0:00:07\n", + " ------ --------------------------------- 33.9/198.6 MB 25.2 MB/s eta 0:00:07\n", + " ------- -------------------------------- 34.9/198.6 MB 27.3 MB/s eta 0:00:06\n", + " ------- -------------------------------- 35.9/198.6 MB 25.1 MB/s eta 0:00:07\n", + " ------- -------------------------------- 37.0/198.6 MB 26.2 MB/s eta 0:00:07\n", + " ------- -------------------------------- 38.0/198.6 MB 26.2 MB/s eta 0:00:07\n", + " ------- -------------------------------- 39.1/198.6 MB 26.2 MB/s eta 0:00:07\n", + " -------- ------------------------------- 40.2/198.6 MB 27.3 MB/s eta 0:00:06\n", + " -------- ------------------------------- 41.3/198.6 MB 26.2 MB/s eta 0:00:06\n", + " -------- ------------------------------- 42.4/198.6 MB 26.2 MB/s eta 0:00:06\n", + " -------- ------------------------------- 43.5/198.6 MB 27.3 MB/s eta 0:00:06\n", + " -------- ------------------------------- 44.6/198.6 MB 26.2 MB/s eta 0:00:06\n", + " --------- ------------------------------ 45.7/198.6 MB 26.2 MB/s eta 0:00:06\n", + " --------- ------------------------------ 46.8/198.6 MB 27.3 MB/s eta 0:00:06\n", + " --------- ------------------------------ 47.9/198.6 MB 27.3 MB/s eta 0:00:06\n", + " --------- ------------------------------ 49.0/198.6 MB 27.3 MB/s eta 0:00:06\n", + " ---------- ----------------------------- 50.0/198.6 MB 27.3 MB/s eta 0:00:06\n", + " ---------- ----------------------------- 51.1/198.6 MB 26.2 MB/s eta 0:00:06\n", + " ---------- ----------------------------- 52.1/198.6 MB 26.2 MB/s eta 0:00:06\n", + " ---------- ----------------------------- 53.2/198.6 MB 27.3 MB/s eta 0:00:06\n", + " ---------- ----------------------------- 54.3/198.6 MB 27.3 MB/s eta 0:00:06\n", + " ----------- ---------------------------- 55.3/198.6 MB 26.2 MB/s eta 0:00:06\n", + " ----------- ---------------------------- 56.5/198.6 MB 27.3 MB/s eta 0:00:06\n", + " ----------- ---------------------------- 57.5/198.6 MB 26.2 MB/s eta 0:00:06\n", + " ----------- ---------------------------- 58.6/198.6 MB 26.2 MB/s eta 0:00:06\n", + " ------------ --------------------------- 59.7/198.6 MB 27.3 MB/s eta 0:00:06\n", + " ------------ --------------------------- 60.7/198.6 MB 26.2 MB/s eta 0:00:06\n", + " ------------ --------------------------- 61.8/198.6 MB 27.3 MB/s eta 0:00:06\n", + " ------------ --------------------------- 62.9/198.6 MB 27.3 MB/s eta 0:00:05\n", + " ------------ --------------------------- 64.0/198.6 MB 27.3 MB/s eta 0:00:05\n", + " ------------- -------------------------- 65.1/198.6 MB 27.3 MB/s eta 0:00:05\n", + " ------------- -------------------------- 66.2/198.6 MB 27.3 MB/s eta 0:00:05\n", + " ------------- -------------------------- 67.3/198.6 MB 27.3 MB/s eta 0:00:05\n", + " ------------- -------------------------- 68.4/198.6 MB 27.3 MB/s eta 0:00:05\n", + " ------------- -------------------------- 69.4/198.6 MB 27.3 MB/s eta 0:00:05\n", + " -------------- ------------------------- 70.5/198.6 MB 27.3 MB/s eta 0:00:05\n", + " -------------- ------------------------- 71.6/198.6 MB 27.3 MB/s eta 0:00:05\n", + " -------------- ------------------------- 72.7/198.6 MB 26.2 MB/s eta 0:00:05\n", + " -------------- ------------------------- 73.8/198.6 MB 27.3 MB/s eta 0:00:05\n", + " --------------- ------------------------ 74.9/198.6 MB 27.3 MB/s eta 0:00:05\n", + " --------------- ------------------------ 76.0/198.6 MB 27.3 MB/s eta 0:00:05\n", + " --------------- ------------------------ 77.1/198.6 MB 26.2 MB/s eta 0:00:05\n", + " --------------- ------------------------ 78.2/198.6 MB 26.2 MB/s eta 0:00:05\n", + " --------------- ------------------------ 79.3/198.6 MB 26.2 MB/s eta 0:00:05\n", + " ---------------- ----------------------- 80.4/198.6 MB 27.3 MB/s eta 0:00:05\n", + " ---------------- ----------------------- 81.5/198.6 MB 27.3 MB/s eta 0:00:05\n", + " ---------------- ----------------------- 82.6/198.6 MB 26.2 MB/s eta 0:00:05\n", + " ---------------- ----------------------- 83.6/198.6 MB 26.2 MB/s eta 0:00:05\n", + " ----------------- ---------------------- 84.7/198.6 MB 27.3 MB/s eta 0:00:05\n", + " ----------------- ---------------------- 85.8/198.6 MB 27.3 MB/s eta 0:00:05\n", + " ----------------- ---------------------- 86.9/198.6 MB 28.5 MB/s eta 0:00:04\n", + " ----------------- ---------------------- 88.0/198.6 MB 27.3 MB/s eta 0:00:05\n", + " ----------------- ---------------------- 89.1/198.6 MB 26.2 MB/s eta 0:00:05\n", + " ------------------ --------------------- 90.1/198.6 MB 27.3 MB/s eta 0:00:04\n", + " ------------------ --------------------- 91.3/198.6 MB 27.3 MB/s eta 0:00:04\n", + " ------------------ --------------------- 92.3/198.6 MB 26.2 MB/s eta 0:00:05\n", + " ------------------ --------------------- 93.4/198.6 MB 27.3 MB/s eta 0:00:04\n", + " ------------------- -------------------- 94.5/198.6 MB 27.3 MB/s eta 0:00:04\n", + " ------------------- -------------------- 95.4/198.6 MB 27.3 MB/s eta 0:00:04\n", + " ------------------- -------------------- 96.4/198.6 MB 27.3 MB/s eta 0:00:04\n", + " ------------------- -------------------- 97.5/198.6 MB 27.3 MB/s eta 0:00:04\n", + " ------------------- -------------------- 98.6/198.6 MB 27.3 MB/s eta 0:00:04\n", + " -------------------- ------------------- 99.7/198.6 MB 26.2 MB/s eta 0:00:04\n", + " ------------------- ------------------- 100.8/198.6 MB 27.3 MB/s eta 0:00:04\n", + " -------------------- ------------------ 101.9/198.6 MB 27.3 MB/s eta 0:00:04\n", + " -------------------- ------------------ 103.0/198.6 MB 27.3 MB/s eta 0:00:04\n", + " -------------------- ------------------ 104.0/198.6 MB 27.3 MB/s eta 0:00:04\n", + " -------------------- ------------------ 105.1/198.6 MB 27.3 MB/s eta 0:00:04\n", + " -------------------- ------------------ 106.1/198.6 MB 27.3 MB/s eta 0:00:04\n", + " --------------------- ----------------- 107.3/198.6 MB 27.3 MB/s eta 0:00:04\n", + " --------------------- ----------------- 108.4/198.6 MB 27.3 MB/s eta 0:00:04\n", + " --------------------- ----------------- 109.5/198.6 MB 27.3 MB/s eta 0:00:04\n", + " --------------------- ----------------- 110.6/198.6 MB 26.2 MB/s eta 0:00:04\n", + " --------------------- ----------------- 111.7/198.6 MB 27.3 MB/s eta 0:00:04\n", + " ---------------------- ---------------- 112.7/198.6 MB 27.3 MB/s eta 0:00:04\n", + " ---------------------- ---------------- 113.8/198.6 MB 27.3 MB/s eta 0:00:04\n", + " ---------------------- ---------------- 114.9/198.6 MB 26.2 MB/s eta 0:00:04\n", + " ---------------------- ---------------- 116.0/198.6 MB 26.2 MB/s eta 0:00:04\n", + " ---------------------- ---------------- 117.1/198.6 MB 27.3 MB/s eta 0:00:03\n", + " ----------------------- --------------- 118.1/198.6 MB 27.3 MB/s eta 0:00:03\n", + " ----------------------- --------------- 119.2/198.6 MB 27.3 MB/s eta 0:00:03\n", + " ----------------------- --------------- 120.3/198.6 MB 26.2 MB/s eta 0:00:03\n", + " ----------------------- --------------- 121.4/198.6 MB 27.3 MB/s eta 0:00:03\n", + " ------------------------ -------------- 122.4/198.6 MB 26.2 MB/s eta 0:00:03\n", + " ------------------------ -------------- 123.2/198.6 MB 27.3 MB/s eta 0:00:03\n", + " ------------------------ -------------- 124.3/198.6 MB 25.2 MB/s eta 0:00:03\n", + " ------------------------ -------------- 125.3/198.6 MB 25.2 MB/s eta 0:00:03\n", + " ------------------------ -------------- 126.3/198.6 MB 26.2 MB/s eta 0:00:03\n", + " ------------------------- ------------- 127.3/198.6 MB 25.2 MB/s eta 0:00:03\n", + " ------------------------- ------------- 128.2/198.6 MB 24.2 MB/s eta 0:00:03\n", + " ------------------------- ------------- 129.4/198.6 MB 25.2 MB/s eta 0:00:03\n", + " ------------------------- ------------- 130.3/198.6 MB 25.2 MB/s eta 0:00:03\n", + " ------------------------- ------------- 131.2/198.6 MB 24.3 MB/s eta 0:00:03\n", + " ------------------------- ------------- 132.2/198.6 MB 24.2 MB/s eta 0:00:03\n", + " -------------------------- ------------ 132.8/198.6 MB 23.4 MB/s eta 0:00:03\n", + " -------------------------- ------------ 133.7/198.6 MB 22.6 MB/s eta 0:00:03\n", + " -------------------------- ------------ 134.6/198.6 MB 23.4 MB/s eta 0:00:03\n", + " -------------------------- ------------ 135.6/198.6 MB 22.6 MB/s eta 0:00:03\n", + " -------------------------- ------------ 136.6/198.6 MB 23.4 MB/s eta 0:00:03\n", + " --------------------------- ----------- 137.7/198.6 MB 22.6 MB/s eta 0:00:03\n", + " --------------------------- ----------- 138.7/198.6 MB 23.4 MB/s eta 0:00:03\n", + " --------------------------- ----------- 139.8/198.6 MB 23.4 MB/s eta 0:00:03\n", + " --------------------------- ----------- 140.9/198.6 MB 23.4 MB/s eta 0:00:03\n", + " --------------------------- ----------- 142.0/198.6 MB 24.2 MB/s eta 0:00:03\n", + " ---------------------------- ---------- 143.0/198.6 MB 24.3 MB/s eta 0:00:03\n", + " ---------------------------- ---------- 144.3/198.6 MB 25.2 MB/s eta 0:00:03\n", + " ---------------------------- ---------- 145.4/198.6 MB 27.3 MB/s eta 0:00:02\n", + " ---------------------------- ---------- 146.4/198.6 MB 26.2 MB/s eta 0:00:02\n", + " ---------------------------- ---------- 147.5/198.6 MB 26.2 MB/s eta 0:00:02\n", + " ----------------------------- --------- 148.6/198.6 MB 26.2 MB/s eta 0:00:02\n", + " ----------------------------- --------- 149.7/198.6 MB 26.2 MB/s eta 0:00:02\n", + " ----------------------------- --------- 150.8/198.6 MB 27.3 MB/s eta 0:00:02\n", + " ----------------------------- --------- 151.8/198.6 MB 26.2 MB/s eta 0:00:02\n", + " ------------------------------ -------- 153.0/198.6 MB 26.2 MB/s eta 0:00:02\n", + " ------------------------------ -------- 154.0/198.6 MB 26.2 MB/s eta 0:00:02\n", + " ------------------------------ -------- 155.1/198.6 MB 27.3 MB/s eta 0:00:02\n", + " ------------------------------ -------- 156.2/198.6 MB 27.3 MB/s eta 0:00:02\n", + " ------------------------------ -------- 157.2/198.6 MB 26.2 MB/s eta 0:00:02\n", + " ------------------------------- ------- 158.4/198.6 MB 26.2 MB/s eta 0:00:02\n", + " ------------------------------- ------- 159.5/198.6 MB 27.3 MB/s eta 0:00:02\n", + " ------------------------------- ------- 160.8/198.6 MB 27.3 MB/s eta 0:00:02\n", + " ------------------------------- ------- 161.9/198.6 MB 27.3 MB/s eta 0:00:02\n", + " -------------------------------- ------ 163.0/198.6 MB 27.3 MB/s eta 0:00:02\n", + " -------------------------------- ------ 164.1/198.6 MB 27.3 MB/s eta 0:00:02\n", + " -------------------------------- ------ 165.2/198.6 MB 27.3 MB/s eta 0:00:02\n", + " -------------------------------- ------ 166.3/198.6 MB 27.3 MB/s eta 0:00:02\n", + " -------------------------------- ------ 167.3/198.6 MB 27.3 MB/s eta 0:00:02\n", + " --------------------------------- ----- 168.3/198.6 MB 26.2 MB/s eta 0:00:02\n", + " --------------------------------- ----- 169.2/198.6 MB 26.2 MB/s eta 0:00:02\n", + " --------------------------------- ----- 170.0/198.6 MB 25.1 MB/s eta 0:00:02\n", + " --------------------------------- ----- 170.8/198.6 MB 25.2 MB/s eta 0:00:02\n", + " --------------------------------- ----- 171.4/198.6 MB 24.2 MB/s eta 0:00:02\n", + " --------------------------------- ----- 172.0/198.6 MB 22.6 MB/s eta 0:00:02\n", + " --------------------------------- ----- 172.8/198.6 MB 22.6 MB/s eta 0:00:02\n", + " ---------------------------------- ---- 173.5/198.6 MB 21.1 MB/s eta 0:00:02\n", + " ---------------------------------- ---- 174.3/198.6 MB 21.1 MB/s eta 0:00:02\n", + " ---------------------------------- ---- 175.2/198.6 MB 21.1 MB/s eta 0:00:02\n", + " ---------------------------------- ---- 176.0/198.6 MB 20.5 MB/s eta 0:00:02\n", + " ---------------------------------- ---- 176.8/198.6 MB 20.5 MB/s eta 0:00:02\n", + " ---------------------------------- ---- 177.6/198.6 MB 19.8 MB/s eta 0:00:02\n", + " ----------------------------------- --- 178.4/198.6 MB 19.3 MB/s eta 0:00:02\n", + " ----------------------------------- --- 179.2/198.6 MB 19.3 MB/s eta 0:00:02\n", + " ----------------------------------- --- 180.1/198.6 MB 19.3 MB/s eta 0:00:01\n", + " ----------------------------------- --- 180.9/198.6 MB 19.3 MB/s eta 0:00:01\n", + " ----------------------------------- --- 181.7/198.6 MB 19.8 MB/s eta 0:00:01\n", + " ----------------------------------- --- 182.5/198.6 MB 19.8 MB/s eta 0:00:01\n", + " ----------------------------------- --- 183.3/198.6 MB 20.5 MB/s eta 0:00:01\n", + " ------------------------------------ -- 184.1/198.6 MB 19.8 MB/s eta 0:00:01\n", + " ------------------------------------ -- 184.9/198.6 MB 20.5 MB/s eta 0:00:01\n", + " ------------------------------------ -- 185.7/198.6 MB 19.9 MB/s eta 0:00:01\n", + " ------------------------------------ -- 186.5/198.6 MB 19.8 MB/s eta 0:00:01\n", + " ------------------------------------ -- 187.2/198.6 MB 19.9 MB/s eta 0:00:01\n", + " ------------------------------------ -- 188.1/198.6 MB 19.3 MB/s eta 0:00:01\n", + " ------------------------------------- - 188.8/198.6 MB 19.9 MB/s eta 0:00:01\n", + " ------------------------------------- - 189.6/198.6 MB 19.3 MB/s eta 0:00:01\n", + " ------------------------------------- - 190.4/198.6 MB 19.8 MB/s eta 0:00:01\n", + " ------------------------------------- - 191.1/198.6 MB 19.3 MB/s eta 0:00:01\n", + " ------------------------------------- - 192.0/198.6 MB 19.9 MB/s eta 0:00:01\n", + " ------------------------------------- - 192.8/198.6 MB 19.8 MB/s eta 0:00:01\n", + " -------------------------------------- 193.7/198.6 MB 19.3 MB/s eta 0:00:01\n", + " -------------------------------------- 194.6/198.6 MB 20.5 MB/s eta 0:00:01\n", + " -------------------------------------- 195.4/198.6 MB 20.5 MB/s eta 0:00:01\n", + " -------------------------------------- 196.3/198.6 MB 19.9 MB/s eta 0:00:01\n", + " -------------------------------------- 197.2/198.6 MB 20.5 MB/s eta 0:00:01\n", + " -------------------------------------- 198.1/198.6 MB 19.9 MB/s eta 0:00:01\n", + " -------------------------------------- 198.6/198.6 MB 20.5 MB/s eta 0:00:01\n", + " -------------------------------------- 198.6/198.6 MB 20.5 MB/s eta 0:00:01\n", + " -------------------------------------- 198.6/198.6 MB 20.5 MB/s eta 0:00:01\n", + " -------------------------------------- 198.6/198.6 MB 20.5 MB/s eta 0:00:01\n", + " -------------------------------------- 198.6/198.6 MB 20.5 MB/s eta 0:00:01\n", + " -------------------------------------- 198.6/198.6 MB 20.5 MB/s eta 0:00:01\n", + " -------------------------------------- 198.6/198.6 MB 20.5 MB/s eta 0:00:01\n", + " -------------------------------------- 198.6/198.6 MB 20.5 MB/s eta 0:00:01\n", + " --------------------------------------- 198.6/198.6 MB 12.8 MB/s eta 0:00:00\n", + "Downloading tyro-0.7.3-py3-none-any.whl (79 kB)\n", + " ---------------------------------------- 0.0/79.8 kB ? eta -:--:--\n", + " ----------------------------------- ---- 71.7/79.8 kB ? eta -:--:--\n", + " ---------------------------------------- 79.8/79.8 kB 2.2 MB/s eta 0:00:00\n", + "Downloading multiprocess-0.70.16-py311-none-any.whl (143 kB)\n", + " ---------------------------------------- 0.0/143.5 kB ? eta -:--:--\n", + " ---------------------------------------- 143.5/143.5 kB 4.3 MB/s eta 0:00:00\n", + "Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)\n", + "Downloading xxhash-3.4.1-cp311-cp311-win_amd64.whl (29 kB)\n", + "Downloading docstring_parser-0.15-py3-none-any.whl (36 kB)\n", + "Downloading shtab-1.7.0-py3-none-any.whl (14 kB)\n", + "Downloading sympy-1.12-py3-none-any.whl (5.7 MB)\n", + " ---------------------------------------- 0.0/5.7 MB ? eta -:--:--\n", + " ------ --------------------------------- 1.0/5.7 MB 20.9 MB/s eta 0:00:01\n", + " ------------ --------------------------- 1.8/5.7 MB 23.5 MB/s eta 0:00:01\n", + " ------------------- -------------------- 2.7/5.7 MB 19.3 MB/s eta 0:00:01\n", + " ------------------------- -------------- 3.6/5.7 MB 20.8 MB/s eta 0:00:01\n", + " ------------------------------ --------- 4.4/5.7 MB 20.1 MB/s eta 0:00:01\n", + " ------------------------------------ --- 5.3/5.7 MB 19.8 MB/s eta 0:00:01\n", + " --------------------------------------- 5.7/5.7 MB 20.4 MB/s eta 0:00:01\n", + " ---------------------------------------- 5.7/5.7 MB 18.4 MB/s eta 0:00:00\n", + "Downloading mpmath-1.3.0-py3-none-any.whl (536 kB)\n", + " ---------------------------------------- 0.0/536.2 kB ? eta -:--:--\n", + " -------------------------------------- 532.5/536.2 kB 32.6 MB/s eta 0:00:01\n", + " ---------------------------------------- 536.2/536.2 kB 8.3 MB/s eta 0:00:00\n", + "Installing collected packages: mpmath, xxhash, sympy, shtab, safetensors, pyarrow-hotfix, pyarrow, docstring-parser, dill, torch, multiprocess, tyro, tokenizers, accelerate, transformers, datasets, trl, peft\n", + "Successfully installed accelerate-0.27.2 datasets-2.18.0 dill-0.3.8 docstring-parser-0.15 mpmath-1.3.0 multiprocess-0.70.16 peft-0.9.0 pyarrow-15.0.0 pyarrow-hotfix-0.6 safetensors-0.4.2 shtab-1.7.0 sympy-1.12 tokenizers-0.15.2 torch-2.2.1 transformers-4.38.2 trl-0.7.11 tyro-0.7.3 xxhash-3.4.1\n", + "Looking in indexes: https://pypi.org/simple/\n", + "Collecting bitsandbytes\n", + " Downloading bitsandbytes-0.42.0-py3-none-any.whl.metadata (9.9 kB)\n", + "Collecting scipy (from bitsandbytes)\n", + " Downloading scipy-1.12.0-cp311-cp311-win_amd64.whl.metadata (60 kB)\n", + " ---------------------------------------- 0.0/60.4 kB ? eta -:--:--\n", + " ------------------------- ------------ 41.0/60.4 kB 991.0 kB/s eta 0:00:01\n", + " -------------------------------------- 60.4/60.4 kB 643.6 kB/s eta 0:00:00\n", + "Requirement already satisfied: numpy<1.29.0,>=1.22.4 in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from scipy->bitsandbytes) (1.26.3)\n", + "Downloading bitsandbytes-0.42.0-py3-none-any.whl (105.0 MB)\n", + " ---------------------------------------- 0.0/105.0 MB ? eta -:--:--\n", + " ---------------------------------------- 0.4/105.0 MB 8.1 MB/s eta 0:00:13\n", + " ---------------------------------------- 1.2/105.0 MB 12.9 MB/s eta 0:00:09\n", + " --------------------------------------- 2.3/105.0 MB 16.6 MB/s eta 0:00:07\n", + " - -------------------------------------- 3.5/105.0 MB 18.5 MB/s eta 0:00:06\n", + " - -------------------------------------- 4.8/105.0 MB 20.3 MB/s eta 0:00:05\n", + " -- ------------------------------------- 6.0/105.0 MB 21.2 MB/s eta 0:00:05\n", + " -- ------------------------------------- 7.2/105.0 MB 22.0 MB/s eta 0:00:05\n", + " --- ------------------------------------ 8.5/105.0 MB 22.7 MB/s eta 0:00:05\n", + " --- ------------------------------------ 9.7/105.0 MB 23.0 MB/s eta 0:00:05\n", + " ---- ----------------------------------- 10.8/105.0 MB 25.1 MB/s eta 0:00:04\n", + " ---- ----------------------------------- 11.1/105.0 MB 24.3 MB/s eta 0:00:04\n", + " ---- ----------------------------------- 12.1/105.0 MB 23.4 MB/s eta 0:00:04\n", + " ---- ----------------------------------- 13.1/105.0 MB 24.2 MB/s eta 0:00:04\n", + " ----- ---------------------------------- 14.1/105.0 MB 23.4 MB/s eta 0:00:04\n", + " ----- ---------------------------------- 15.2/105.0 MB 23.4 MB/s eta 0:00:04\n", + " ------ --------------------------------- 16.6/105.0 MB 23.4 MB/s eta 0:00:04\n", + " ------ --------------------------------- 17.7/105.0 MB 22.6 MB/s eta 0:00:04\n", + " ------- -------------------------------- 18.8/105.0 MB 23.4 MB/s eta 0:00:04\n", + " ------- -------------------------------- 19.9/105.0 MB 23.4 MB/s eta 0:00:04\n", + " ------- -------------------------------- 21.0/105.0 MB 24.3 MB/s eta 0:00:04\n", + " -------- ------------------------------- 22.1/105.0 MB 26.2 MB/s eta 0:00:04\n", + " -------- ------------------------------- 23.2/105.0 MB 27.3 MB/s eta 0:00:03\n", + " --------- ------------------------------ 24.2/105.0 MB 27.3 MB/s eta 0:00:03\n", + " --------- ------------------------------ 25.3/105.0 MB 27.3 MB/s eta 0:00:03\n", + " ---------- ----------------------------- 26.4/105.0 MB 27.3 MB/s eta 0:00:03\n", + " ---------- ----------------------------- 27.5/105.0 MB 27.3 MB/s eta 0:00:03\n", + " ---------- ----------------------------- 28.6/105.0 MB 28.4 MB/s eta 0:00:03\n", + " ----------- ---------------------------- 29.8/105.0 MB 26.2 MB/s eta 0:00:03\n", + " ----------- ---------------------------- 30.9/105.0 MB 27.3 MB/s eta 0:00:03\n", + " ------------ --------------------------- 31.9/105.0 MB 27.3 MB/s eta 0:00:03\n", + " ------------ --------------------------- 33.4/105.0 MB 27.3 MB/s eta 0:00:03\n", + " ------------- -------------------------- 34.5/105.0 MB 27.3 MB/s eta 0:00:03\n", + " ------------- -------------------------- 35.6/105.0 MB 27.3 MB/s eta 0:00:03\n", + " ------------- -------------------------- 36.7/105.0 MB 28.4 MB/s eta 0:00:03\n", + " -------------- ------------------------- 37.8/105.0 MB 27.3 MB/s eta 0:00:03\n", + " -------------- ------------------------- 38.9/105.0 MB 27.3 MB/s eta 0:00:03\n", + " --------------- ------------------------ 40.0/105.0 MB 28.4 MB/s eta 0:00:03\n", + " --------------- ------------------------ 41.1/105.0 MB 27.3 MB/s eta 0:00:03\n", + " ---------------- ----------------------- 42.3/105.0 MB 28.4 MB/s eta 0:00:03\n", + " ---------------- ----------------------- 43.1/105.0 MB 26.2 MB/s eta 0:00:03\n", + " ---------------- ----------------------- 44.3/105.0 MB 26.2 MB/s eta 0:00:03\n", + " ----------------- ---------------------- 45.2/105.0 MB 26.2 MB/s eta 0:00:03\n", + " ----------------- ---------------------- 46.4/105.0 MB 27.3 MB/s eta 0:00:03\n", + " ------------------ --------------------- 47.5/105.0 MB 27.3 MB/s eta 0:00:03\n", + " ------------------ --------------------- 48.8/105.0 MB 26.2 MB/s eta 0:00:03\n", + " ------------------ --------------------- 49.8/105.0 MB 26.2 MB/s eta 0:00:03\n", + " ------------------- -------------------- 50.8/105.0 MB 26.2 MB/s eta 0:00:03\n", + " ------------------- -------------------- 51.6/105.0 MB 25.1 MB/s eta 0:00:03\n", + " -------------------- ------------------- 52.9/105.0 MB 25.2 MB/s eta 0:00:03\n", + " -------------------- ------------------- 54.1/105.0 MB 25.2 MB/s eta 0:00:03\n", + " --------------------- ------------------ 55.4/105.0 MB 25.2 MB/s eta 0:00:02\n", + " --------------------- ------------------ 56.6/105.0 MB 25.2 MB/s eta 0:00:02\n", + " ---------------------- ----------------- 57.9/105.0 MB 25.1 MB/s eta 0:00:02\n", + " ---------------------- ----------------- 59.2/105.0 MB 25.1 MB/s eta 0:00:02\n", + " ----------------------- ---------------- 60.4/105.0 MB 26.2 MB/s eta 0:00:02\n", + " ----------------------- ---------------- 61.7/105.0 MB 26.2 MB/s eta 0:00:02\n", + " ----------------------- ---------------- 62.9/105.0 MB 27.3 MB/s eta 0:00:02\n", + " ------------------------ --------------- 64.2/105.0 MB 27.3 MB/s eta 0:00:02\n", + " ------------------------ --------------- 65.5/105.0 MB 27.3 MB/s eta 0:00:02\n", + " ------------------------- -------------- 66.5/105.0 MB 27.3 MB/s eta 0:00:02\n", + " ------------------------- -------------- 67.7/105.0 MB 28.5 MB/s eta 0:00:02\n", + " -------------------------- ------------- 68.8/105.0 MB 28.5 MB/s eta 0:00:02\n", + " -------------------------- ------------- 69.9/105.0 MB 27.3 MB/s eta 0:00:02\n", + " --------------------------- ------------ 70.9/105.0 MB 28.5 MB/s eta 0:00:02\n", + " --------------------------- ------------ 71.9/105.0 MB 27.3 MB/s eta 0:00:02\n", + " --------------------------- ------------ 72.9/105.0 MB 26.2 MB/s eta 0:00:02\n", + " ---------------------------- ----------- 73.7/105.0 MB 26.2 MB/s eta 0:00:02\n", + " ---------------------------- ----------- 74.0/105.0 MB 26.2 MB/s eta 0:00:02\n", + " ---------------------------- ----------- 75.1/105.0 MB 24.2 MB/s eta 0:00:02\n", + " ----------------------------- ---------- 76.2/105.0 MB 24.3 MB/s eta 0:00:02\n", + " ----------------------------- ---------- 77.3/105.0 MB 23.4 MB/s eta 0:00:02\n", + " ----------------------------- ---------- 78.3/105.0 MB 24.2 MB/s eta 0:00:02\n", + " ------------------------------ --------- 78.9/105.0 MB 23.4 MB/s eta 0:00:02\n", + " ------------------------------ --------- 80.3/105.0 MB 22.6 MB/s eta 0:00:02\n", + " ------------------------------ --------- 81.2/105.0 MB 23.4 MB/s eta 0:00:02\n", + " ------------------------------- -------- 81.8/105.0 MB 21.9 MB/s eta 0:00:02\n", + " ------------------------------- -------- 82.5/105.0 MB 21.1 MB/s eta 0:00:02\n", + " ------------------------------- -------- 83.3/105.0 MB 21.8 MB/s eta 0:00:01\n", + " -------------------------------- ------- 84.1/105.0 MB 21.1 MB/s eta 0:00:01\n", + " -------------------------------- ------- 84.9/105.0 MB 22.6 MB/s eta 0:00:01\n", + " -------------------------------- ------- 85.8/105.0 MB 21.8 MB/s eta 0:00:01\n", + " --------------------------------- ------ 86.6/105.0 MB 21.1 MB/s eta 0:00:01\n", + " --------------------------------- ------ 87.5/105.0 MB 21.1 MB/s eta 0:00:01\n", + " --------------------------------- ------ 88.3/105.0 MB 20.5 MB/s eta 0:00:01\n", + " --------------------------------- ------ 89.1/105.0 MB 19.9 MB/s eta 0:00:01\n", + " ---------------------------------- ----- 89.9/105.0 MB 19.8 MB/s eta 0:00:01\n", + " ---------------------------------- ----- 90.8/105.0 MB 19.8 MB/s eta 0:00:01\n", + " ---------------------------------- ----- 91.6/105.0 MB 20.5 MB/s eta 0:00:01\n", + " ----------------------------------- ---- 92.3/105.0 MB 19.8 MB/s eta 0:00:01\n", + " ----------------------------------- ---- 93.2/105.0 MB 20.5 MB/s eta 0:00:01\n", + " ----------------------------------- ---- 94.0/105.0 MB 20.5 MB/s eta 0:00:01\n", + " ------------------------------------ --- 94.6/105.0 MB 19.8 MB/s eta 0:00:01\n", + " ------------------------------------ --- 95.5/105.0 MB 20.5 MB/s eta 0:00:01\n", + " ------------------------------------ --- 96.3/105.0 MB 19.8 MB/s eta 0:00:01\n", + " ------------------------------------- -- 97.2/105.0 MB 20.5 MB/s eta 0:00:01\n", + " ------------------------------------- -- 98.0/105.0 MB 19.8 MB/s eta 0:00:01\n", + " ------------------------------------- -- 98.9/105.0 MB 19.8 MB/s eta 0:00:01\n", + " -------------------------------------- - 99.8/105.0 MB 20.5 MB/s eta 0:00:01\n", + " ------------------------------------- - 100.6/105.0 MB 20.5 MB/s eta 0:00:01\n", + " ------------------------------------- - 101.6/105.0 MB 19.8 MB/s eta 0:00:01\n", + " -------------------------------------- 102.4/105.0 MB 21.1 MB/s eta 0:00:01\n", + " -------------------------------------- 103.3/105.0 MB 20.5 MB/s eta 0:00:01\n", + " -------------------------------------- 104.1/105.0 MB 21.1 MB/s eta 0:00:01\n", + " -------------------------------------- 105.0/105.0 MB 21.1 MB/s eta 0:00:01\n", + " -------------------------------------- 105.0/105.0 MB 21.1 MB/s eta 0:00:01\n", + " -------------------------------------- 105.0/105.0 MB 21.1 MB/s eta 0:00:01\n", + " -------------------------------------- 105.0/105.0 MB 21.1 MB/s eta 0:00:01\n", + " -------------------------------------- 105.0/105.0 MB 21.1 MB/s eta 0:00:01\n", + " -------------------------------------- 105.0/105.0 MB 21.1 MB/s eta 0:00:01\n", + " --------------------------------------- 105.0/105.0 MB 14.5 MB/s eta 0:00:00\n", + "Downloading scipy-1.12.0-cp311-cp311-win_amd64.whl (46.2 MB)\n", + " ---------------------------------------- 0.0/46.2 MB ? eta -:--:--\n", + " - -------------------------------------- 1.2/46.2 MB 25.1 MB/s eta 0:00:02\n", + " - -------------------------------------- 2.1/46.2 MB 26.4 MB/s eta 0:00:02\n", + " -- ------------------------------------- 2.9/46.2 MB 23.3 MB/s eta 0:00:02\n", + " --- ------------------------------------ 3.8/46.2 MB 24.4 MB/s eta 0:00:02\n", + " ---- ----------------------------------- 4.7/46.2 MB 23.3 MB/s eta 0:00:02\n", + " ---- ----------------------------------- 5.6/46.2 MB 22.4 MB/s eta 0:00:02\n", + " ----- ---------------------------------- 6.5/46.2 MB 22.0 MB/s eta 0:00:02\n", + " ------ --------------------------------- 7.4/46.2 MB 22.6 MB/s eta 0:00:02\n", + " ------- -------------------------------- 8.3/46.2 MB 23.2 MB/s eta 0:00:02\n", + " ------- -------------------------------- 9.1/46.2 MB 22.4 MB/s eta 0:00:02\n", + " -------- ------------------------------- 10.2/46.2 MB 22.4 MB/s eta 0:00:02\n", + " --------- ------------------------------ 10.9/46.2 MB 21.8 MB/s eta 0:00:02\n", + " ---------- ----------------------------- 11.7/46.2 MB 21.8 MB/s eta 0:00:02\n", + " ---------- ----------------------------- 12.6/46.2 MB 21.1 MB/s eta 0:00:02\n", + " ----------- ---------------------------- 13.3/46.2 MB 21.8 MB/s eta 0:00:02\n", + " ------------ --------------------------- 14.2/46.2 MB 21.1 MB/s eta 0:00:02\n", + " ------------ --------------------------- 14.8/46.2 MB 21.1 MB/s eta 0:00:02\n", + " ------------- -------------------------- 15.5/46.2 MB 20.5 MB/s eta 0:00:02\n", + " ------------- -------------------------- 16.1/46.2 MB 19.8 MB/s eta 0:00:02\n", + " -------------- ------------------------- 16.8/46.2 MB 19.9 MB/s eta 0:00:02\n", + " --------------- ------------------------ 17.5/46.2 MB 19.3 MB/s eta 0:00:02\n", + " --------------- ------------------------ 18.1/46.2 MB 18.2 MB/s eta 0:00:02\n", + " --------------- ------------------------ 18.5/46.2 MB 18.2 MB/s eta 0:00:02\n", + " ---------------- ----------------------- 19.5/46.2 MB 18.2 MB/s eta 0:00:02\n", + " ----------------- ---------------------- 20.2/46.2 MB 18.2 MB/s eta 0:00:02\n", + " ------------------ --------------------- 20.9/46.2 MB 17.7 MB/s eta 0:00:02\n", + " ------------------ --------------------- 21.5/46.2 MB 17.2 MB/s eta 0:00:02\n", + " ------------------- -------------------- 22.1/46.2 MB 16.8 MB/s eta 0:00:02\n", + " ------------------- -------------------- 22.8/46.2 MB 16.4 MB/s eta 0:00:02\n", + " -------------------- ------------------- 23.4/46.2 MB 16.4 MB/s eta 0:00:02\n", + " -------------------- ------------------- 24.2/46.2 MB 16.4 MB/s eta 0:00:02\n", + " --------------------- ------------------ 24.8/46.2 MB 16.0 MB/s eta 0:00:02\n", + " ---------------------- ----------------- 25.5/46.2 MB 16.0 MB/s eta 0:00:02\n", + " ---------------------- ----------------- 26.2/46.2 MB 16.8 MB/s eta 0:00:02\n", + " ----------------------- ---------------- 26.9/46.2 MB 16.4 MB/s eta 0:00:02\n", + " ----------------------- ---------------- 27.6/46.2 MB 16.8 MB/s eta 0:00:02\n", + " ------------------------ --------------- 28.3/46.2 MB 16.8 MB/s eta 0:00:02\n", + " ------------------------- -------------- 29.1/46.2 MB 16.8 MB/s eta 0:00:02\n", + " ------------------------- -------------- 29.8/46.2 MB 16.8 MB/s eta 0:00:01\n", + " -------------------------- ------------- 30.5/46.2 MB 16.8 MB/s eta 0:00:01\n", + " --------------------------- ------------ 31.3/46.2 MB 17.2 MB/s eta 0:00:01\n", + " --------------------------- ------------ 32.0/46.2 MB 17.3 MB/s eta 0:00:01\n", + " ---------------------------- ----------- 32.7/46.2 MB 17.3 MB/s eta 0:00:01\n", + " ----------------------------- ---------- 33.5/46.2 MB 17.7 MB/s eta 0:00:01\n", + " ----------------------------- ---------- 34.2/46.2 MB 17.3 MB/s eta 0:00:01\n", + " ------------------------------ --------- 34.9/46.2 MB 17.7 MB/s eta 0:00:01\n", + " ------------------------------ --------- 35.7/46.2 MB 17.7 MB/s eta 0:00:01\n", + " ------------------------------- -------- 36.4/46.2 MB 17.7 MB/s eta 0:00:01\n", + " -------------------------------- ------- 37.2/46.2 MB 18.2 MB/s eta 0:00:01\n", + " -------------------------------- ------- 38.0/46.2 MB 18.2 MB/s eta 0:00:01\n", + " --------------------------------- ------ 38.7/46.2 MB 18.2 MB/s eta 0:00:01\n", + " ---------------------------------- ----- 39.4/46.2 MB 18.2 MB/s eta 0:00:01\n", + " ---------------------------------- ----- 40.2/46.2 MB 18.2 MB/s eta 0:00:01\n", + " ----------------------------------- ---- 41.0/46.2 MB 18.7 MB/s eta 0:00:01\n", + " ------------------------------------ --- 41.8/46.2 MB 18.2 MB/s eta 0:00:01\n", + " ------------------------------------ --- 42.6/46.2 MB 18.2 MB/s eta 0:00:01\n", + " ------------------------------------- -- 43.4/46.2 MB 19.3 MB/s eta 0:00:01\n", + " -------------------------------------- - 44.2/46.2 MB 18.7 MB/s eta 0:00:01\n", + " -------------------------------------- - 44.9/46.2 MB 19.3 MB/s eta 0:00:01\n", + " --------------------------------------- 45.7/46.2 MB 18.7 MB/s eta 0:00:01\n", + " --------------------------------------- 46.2/46.2 MB 18.7 MB/s eta 0:00:01\n", + " --------------------------------------- 46.2/46.2 MB 18.7 MB/s eta 0:00:01\n", + " --------------------------------------- 46.2/46.2 MB 18.7 MB/s eta 0:00:01\n", + " ---------------------------------------- 46.2/46.2 MB 15.9 MB/s eta 0:00:00\n", + "Installing collected packages: scipy, bitsandbytes\n", + "Successfully installed bitsandbytes-0.42.0 scipy-1.12.0\n", + "Requirement already satisfied: bottleneck in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (1.3.5)\n", + "Collecting bottleneck\n", + " Downloading Bottleneck-1.3.8-cp311-cp311-win_amd64.whl.metadata (8.1 kB)\n", + "Requirement already satisfied: numpy in c:\\users\\boulanger\\appdata\\local\\miniconda3\\lib\\site-packages (from bottleneck) (1.26.3)\n", + "Downloading Bottleneck-1.3.8-cp311-cp311-win_amd64.whl (110 kB)\n", + " ---------------------------------------- 0.0/110.1 kB ? eta -:--:--\n", + " ---------- ---------------------------- 30.7/110.1 kB 660.6 kB/s eta 0:00:01\n", + " ------------------------------------- -- 102.4/110.1 kB 1.5 MB/s eta 0:00:01\n", + " ------------------------------------- -- 102.4/110.1 kB 1.5 MB/s eta 0:00:01\n", + " -------------------------------------- 110.1/110.1 kB 799.1 kB/s eta 0:00:00\n", + "Installing collected packages: bottleneck\n", + " Attempting uninstall: bottleneck\n", + " Found existing installation: Bottleneck 1.3.5\n", + " Uninstalling Bottleneck-1.3.5:\n", + " Successfully uninstalled Bottleneck-1.3.5\n", + "Successfully installed bottleneck-1.3.8\n" + ] + } + ], "source": [ "!pip install transformers datasets peft python-dotenv accelerate trl\n", "!pip install -i https://pypi.org/simple/ bitsandbytes\n", @@ -73,25 +666,12 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "6b625baf-d995-43b8-b8ce-f95ec624c73a", "metadata": { "tags": [] }, - "outputs": [ - { - "ename": "KeyError", - "evalue": "'HF_TOKEN'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[2], line 13\u001b[0m\n\u001b[1;32m 6\u001b[0m model_id \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgoogle/gemma-2b\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 7\u001b[0m bnb_config \u001b[38;5;241m=\u001b[39m BitsAndBytesConfig(\n\u001b[1;32m 8\u001b[0m load_in_4bit\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 9\u001b[0m bnb_4bit_quant_type\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnf4\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 10\u001b[0m bnb_4bit_compute_dtype\u001b[38;5;241m=\u001b[39mtorch\u001b[38;5;241m.\u001b[39mbfloat16\n\u001b[1;32m 11\u001b[0m )\n\u001b[0;32m---> 13\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(model_id, token\u001b[38;5;241m=\u001b[39m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43menviron\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mHF_TOKEN\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m, padding_side\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mright\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 14\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForCausalLM\u001b[38;5;241m.\u001b[39mfrom_pretrained(model_id, quantization_config\u001b[38;5;241m=\u001b[39mbnb_config, device_map\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m:\u001b[38;5;241m0\u001b[39m}, token\u001b[38;5;241m=\u001b[39mos\u001b[38;5;241m.\u001b[39menviron[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mHF_TOKEN\u001b[39m\u001b[38;5;124m'\u001b[39m])\n", - "File \u001b[0;32m/mpcdf/soft/SLE_15/packages/x86_64/anaconda/3/2023.03/lib/python3.10/os.py:680\u001b[0m, in \u001b[0;36m_Environ.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 677\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_data[\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mencodekey(key)]\n\u001b[1;32m 678\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m 679\u001b[0m \u001b[38;5;66;03m# raise KeyError with the original key value\u001b[39;00m\n\u001b[0;32m--> 680\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28mNone\u001b[39m\n\u001b[1;32m 681\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdecodevalue(value)\n", - "\u001b[0;31mKeyError\u001b[0m: 'HF_TOKEN'" - ] - } - ], + "outputs": [], "source": [ "import torch\n", "from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig\n", @@ -162,26 +742,26 @@ "evalue": "CUDA out of memory. Tried to allocate 30.00 MiB. GPU 0 has a total capacity of 23.67 GiB of which 38.19 MiB is free. Process 36053 has 11.64 GiB memory in use. Process 31527 has 7.32 GiB memory in use. Including non-PyTorch memory, this process has 4.63 GiB memory in use. Of the allocated memory 4.34 GiB is allocated by PyTorch, and 9.73 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)", "output_type": "error", "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mOutOfMemoryError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[4], line 33\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m [text]\n\u001b[1;32m 15\u001b[0m trainer \u001b[38;5;241m=\u001b[39m SFTTrainer(\n\u001b[1;32m 16\u001b[0m model\u001b[38;5;241m=\u001b[39mmodel,\n\u001b[1;32m 17\u001b[0m train_dataset\u001b[38;5;241m=\u001b[39mdata[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtrain\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 31\u001b[0m formatting_func\u001b[38;5;241m=\u001b[39mformatting_func,\n\u001b[1;32m 32\u001b[0m )\n\u001b[0;32m---> 33\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/trl/trainer/sft_trainer.py:331\u001b[0m, in \u001b[0;36mSFTTrainer.train\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mneftune_noise_alpha \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_trainer_supports_neftune:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_trl_activate_neftune(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel)\n\u001b[0;32m--> 331\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 333\u001b[0m \u001b[38;5;66;03m# After training we make sure to retrieve back the original forward pass method\u001b[39;00m\n\u001b[1;32m 334\u001b[0m \u001b[38;5;66;03m# for the embedding layer by removing the forward post hook.\u001b[39;00m\n\u001b[1;32m 335\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mneftune_noise_alpha \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_trainer_supports_neftune:\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/transformers/trainer.py:1624\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1622\u001b[0m hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n\u001b[1;32m 1623\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1624\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1625\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1626\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1627\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1628\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1629\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/transformers/trainer.py:1961\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 1958\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_step_begin(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n\u001b[1;32m 1960\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maccelerator\u001b[38;5;241m.\u001b[39maccumulate(model):\n\u001b[0;32m-> 1961\u001b[0m tr_loss_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtraining_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1963\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 1964\u001b[0m args\u001b[38;5;241m.\u001b[39mlogging_nan_inf_filter\n\u001b[1;32m 1965\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torch_tpu_available()\n\u001b[1;32m 1966\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m (torch\u001b[38;5;241m.\u001b[39misnan(tr_loss_step) \u001b[38;5;129;01mor\u001b[39;00m torch\u001b[38;5;241m.\u001b[39misinf(tr_loss_step))\n\u001b[1;32m 1967\u001b[0m ):\n\u001b[1;32m 1968\u001b[0m \u001b[38;5;66;03m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[1;32m 1969\u001b[0m tr_loss \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m tr_loss \u001b[38;5;241m/\u001b[39m (\u001b[38;5;241m1\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_globalstep_last_logged)\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/transformers/trainer.py:2902\u001b[0m, in \u001b[0;36mTrainer.training_step\u001b[0;34m(self, model, inputs)\u001b[0m\n\u001b[1;32m 2899\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m loss_mb\u001b[38;5;241m.\u001b[39mreduce_mean()\u001b[38;5;241m.\u001b[39mdetach()\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[1;32m 2901\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompute_loss_context_manager():\n\u001b[0;32m-> 2902\u001b[0m loss \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute_loss\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2904\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mn_gpu \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 2905\u001b[0m loss \u001b[38;5;241m=\u001b[39m loss\u001b[38;5;241m.\u001b[39mmean() \u001b[38;5;66;03m# mean() to average on multi-gpu parallel training\u001b[39;00m\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/transformers/trainer.py:2925\u001b[0m, in \u001b[0;36mTrainer.compute_loss\u001b[0;34m(self, model, inputs, return_outputs)\u001b[0m\n\u001b[1;32m 2923\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 2924\u001b[0m labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 2925\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2926\u001b[0m \u001b[38;5;66;03m# Save past state if it exists\u001b[39;00m\n\u001b[1;32m 2927\u001b[0m \u001b[38;5;66;03m# TODO: this needs to be fixed and made cleaner later.\u001b[39;00m\n\u001b[1;32m 2928\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mpast_index \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1509\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1518\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1519\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1523\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/accelerate/utils/operations.py:817\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32.<locals>.forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 816\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 817\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/accelerate/utils/operations.py:805\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 804\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 805\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:16\u001b[0m, in \u001b[0;36mautocast_decorator.<locals>.decorate_autocast\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_autocast\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[0;32m---> 16\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/peft/peft_model.py:1091\u001b[0m, in \u001b[0;36mPeftModelForCausalLM.forward\u001b[0;34m(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, task_ids, **kwargs)\u001b[0m\n\u001b[1;32m 1089\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m peft_config\u001b[38;5;241m.\u001b[39mpeft_type \u001b[38;5;241m==\u001b[39m PeftType\u001b[38;5;241m.\u001b[39mPOLY:\n\u001b[1;32m 1090\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtask_ids\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m task_ids\n\u001b[0;32m-> 1091\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbase_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1092\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1093\u001b[0m \u001b[43m \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1094\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs_embeds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs_embeds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1095\u001b[0m \u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1096\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1097\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1098\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1099\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1100\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1102\u001b[0m batch_size \u001b[38;5;241m=\u001b[39m _get_batch_size(input_ids, inputs_embeds)\n\u001b[1;32m 1103\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m attention_mask \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1104\u001b[0m \u001b[38;5;66;03m# concat prompt attention mask\u001b[39;00m\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1509\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1518\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1519\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1523\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:160\u001b[0m, in \u001b[0;36mBaseTuner.forward\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any):\n\u001b[0;32m--> 160\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/accelerate/hooks.py:166\u001b[0m, in \u001b[0;36madd_hook_to_module.<locals>.new_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 164\u001b[0m output \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39m_old_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 166\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_old_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 167\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m module\u001b[38;5;241m.\u001b[39m_hf_hook\u001b[38;5;241m.\u001b[39mpost_forward(module, output)\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/transformers/models/gemma/modeling_gemma.py:1088\u001b[0m, in \u001b[0;36mGemmaForCausalLM.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, cache_position)\u001b[0m\n\u001b[1;32m 1086\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m outputs[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 1087\u001b[0m logits \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlm_head(hidden_states)\n\u001b[0;32m-> 1088\u001b[0m logits \u001b[38;5;241m=\u001b[39m \u001b[43mlogits\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloat\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1089\u001b[0m loss \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1090\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m labels \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1091\u001b[0m \u001b[38;5;66;03m# Shift so that tokens < n predict n\u001b[39;00m\n", - "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 30.00 MiB. GPU 0 has a total capacity of 23.67 GiB of which 38.19 MiB is free. Process 36053 has 11.64 GiB memory in use. Process 31527 has 7.32 GiB memory in use. Including non-PyTorch memory, this process has 4.63 GiB memory in use. Of the allocated memory 4.34 GiB is allocated by PyTorch, and 9.73 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)" + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mOutOfMemoryError\u001B[0m Traceback (most recent call last)", + "Cell \u001B[0;32mIn[4], line 33\u001B[0m\n\u001B[1;32m 13\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m [text]\n\u001B[1;32m 15\u001B[0m trainer \u001B[38;5;241m=\u001B[39m SFTTrainer(\n\u001B[1;32m 16\u001B[0m model\u001B[38;5;241m=\u001B[39mmodel,\n\u001B[1;32m 17\u001B[0m train_dataset\u001B[38;5;241m=\u001B[39mdata[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mtrain\u001B[39m\u001B[38;5;124m\"\u001B[39m],\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 31\u001B[0m formatting_func\u001B[38;5;241m=\u001B[39mformatting_func,\n\u001B[1;32m 32\u001B[0m )\n\u001B[0;32m---> 33\u001B[0m \u001B[43mtrainer\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mtrain\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m~/.local/lib/python3.10/site-packages/trl/trainer/sft_trainer.py:331\u001B[0m, in \u001B[0;36mSFTTrainer.train\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m 328\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mneftune_noise_alpha \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m \u001B[38;5;129;01mand\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_trainer_supports_neftune:\n\u001B[1;32m 329\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mmodel \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_trl_activate_neftune(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mmodel)\n\u001B[0;32m--> 331\u001B[0m output \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43msuper\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mtrain\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 333\u001B[0m \u001B[38;5;66;03m# After training we make sure to retrieve back the original forward pass method\u001B[39;00m\n\u001B[1;32m 334\u001B[0m \u001B[38;5;66;03m# for the embedding layer by removing the forward post hook.\u001B[39;00m\n\u001B[1;32m 335\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mneftune_noise_alpha \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m \u001B[38;5;129;01mand\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_trainer_supports_neftune:\n", + "File \u001B[0;32m~/.local/lib/python3.10/site-packages/transformers/trainer.py:1624\u001B[0m, in \u001B[0;36mTrainer.train\u001B[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001B[0m\n\u001B[1;32m 1622\u001B[0m hf_hub_utils\u001B[38;5;241m.\u001B[39menable_progress_bars()\n\u001B[1;32m 1623\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m-> 1624\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43minner_training_loop\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 1625\u001B[0m \u001B[43m \u001B[49m\u001B[43margs\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1626\u001B[0m \u001B[43m \u001B[49m\u001B[43mresume_from_checkpoint\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mresume_from_checkpoint\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1627\u001B[0m \u001B[43m \u001B[49m\u001B[43mtrial\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mtrial\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1628\u001B[0m \u001B[43m \u001B[49m\u001B[43mignore_keys_for_eval\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mignore_keys_for_eval\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1629\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m~/.local/lib/python3.10/site-packages/transformers/trainer.py:1961\u001B[0m, in \u001B[0;36mTrainer._inner_training_loop\u001B[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001B[0m\n\u001B[1;32m 1958\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcontrol \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcallback_handler\u001B[38;5;241m.\u001B[39mon_step_begin(args, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mstate, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcontrol)\n\u001B[1;32m 1960\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39maccelerator\u001B[38;5;241m.\u001B[39maccumulate(model):\n\u001B[0;32m-> 1961\u001B[0m tr_loss_step \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mtraining_step\u001B[49m\u001B[43m(\u001B[49m\u001B[43mmodel\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43minputs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1963\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m (\n\u001B[1;32m 1964\u001B[0m args\u001B[38;5;241m.\u001B[39mlogging_nan_inf_filter\n\u001B[1;32m 1965\u001B[0m \u001B[38;5;129;01mand\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m is_torch_tpu_available()\n\u001B[1;32m 1966\u001B[0m \u001B[38;5;129;01mand\u001B[39;00m (torch\u001B[38;5;241m.\u001B[39misnan(tr_loss_step) \u001B[38;5;129;01mor\u001B[39;00m torch\u001B[38;5;241m.\u001B[39misinf(tr_loss_step))\n\u001B[1;32m 1967\u001B[0m ):\n\u001B[1;32m 1968\u001B[0m \u001B[38;5;66;03m# if loss is nan or inf simply add the average of previous logged losses\u001B[39;00m\n\u001B[1;32m 1969\u001B[0m tr_loss \u001B[38;5;241m+\u001B[39m\u001B[38;5;241m=\u001B[39m tr_loss \u001B[38;5;241m/\u001B[39m (\u001B[38;5;241m1\u001B[39m \u001B[38;5;241m+\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mstate\u001B[38;5;241m.\u001B[39mglobal_step \u001B[38;5;241m-\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_globalstep_last_logged)\n", + "File \u001B[0;32m~/.local/lib/python3.10/site-packages/transformers/trainer.py:2902\u001B[0m, in \u001B[0;36mTrainer.training_step\u001B[0;34m(self, model, inputs)\u001B[0m\n\u001B[1;32m 2899\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m loss_mb\u001B[38;5;241m.\u001B[39mreduce_mean()\u001B[38;5;241m.\u001B[39mdetach()\u001B[38;5;241m.\u001B[39mto(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39margs\u001B[38;5;241m.\u001B[39mdevice)\n\u001B[1;32m 2901\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcompute_loss_context_manager():\n\u001B[0;32m-> 2902\u001B[0m loss \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcompute_loss\u001B[49m\u001B[43m(\u001B[49m\u001B[43mmodel\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43minputs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 2904\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39margs\u001B[38;5;241m.\u001B[39mn_gpu \u001B[38;5;241m>\u001B[39m \u001B[38;5;241m1\u001B[39m:\n\u001B[1;32m 2905\u001B[0m loss \u001B[38;5;241m=\u001B[39m loss\u001B[38;5;241m.\u001B[39mmean() \u001B[38;5;66;03m# mean() to average on multi-gpu parallel training\u001B[39;00m\n", + "File \u001B[0;32m~/.local/lib/python3.10/site-packages/transformers/trainer.py:2925\u001B[0m, in \u001B[0;36mTrainer.compute_loss\u001B[0;34m(self, model, inputs, return_outputs)\u001B[0m\n\u001B[1;32m 2923\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 2924\u001B[0m labels \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[0;32m-> 2925\u001B[0m outputs \u001B[38;5;241m=\u001B[39m \u001B[43mmodel\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43minputs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 2926\u001B[0m \u001B[38;5;66;03m# Save past state if it exists\u001B[39;00m\n\u001B[1;32m 2927\u001B[0m \u001B[38;5;66;03m# TODO: this needs to be fixed and made cleaner later.\u001B[39;00m\n\u001B[1;32m 2928\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39margs\u001B[38;5;241m.\u001B[39mpast_index \u001B[38;5;241m>\u001B[39m\u001B[38;5;241m=\u001B[39m \u001B[38;5;241m0\u001B[39m:\n", + "File \u001B[0;32m~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1511\u001B[0m, in \u001B[0;36mModule._wrapped_call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m 1509\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_compiled_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs) \u001B[38;5;66;03m# type: ignore[misc]\u001B[39;00m\n\u001B[1;32m 1510\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m-> 1511\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_call_impl\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1520\u001B[0m, in \u001B[0;36mModule._call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m 1515\u001B[0m \u001B[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001B[39;00m\n\u001B[1;32m 1516\u001B[0m \u001B[38;5;66;03m# this function, and just call forward.\u001B[39;00m\n\u001B[1;32m 1517\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_pre_hooks\n\u001B[1;32m 1518\u001B[0m \u001B[38;5;129;01mor\u001B[39;00m _global_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_backward_hooks\n\u001B[1;32m 1519\u001B[0m \u001B[38;5;129;01mor\u001B[39;00m _global_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_forward_pre_hooks):\n\u001B[0;32m-> 1520\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mforward_call\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1522\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m 1523\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n", + "File \u001B[0;32m~/.local/lib/python3.10/site-packages/accelerate/utils/operations.py:817\u001B[0m, in \u001B[0;36mconvert_outputs_to_fp32.<locals>.forward\u001B[0;34m(*args, **kwargs)\u001B[0m\n\u001B[1;32m 816\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mforward\u001B[39m(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs):\n\u001B[0;32m--> 817\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mmodel_forward\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m~/.local/lib/python3.10/site-packages/accelerate/utils/operations.py:805\u001B[0m, in \u001B[0;36mConvertOutputsToFp32.__call__\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m 804\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21m__call__\u001B[39m(\u001B[38;5;28mself\u001B[39m, \u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs):\n\u001B[0;32m--> 805\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m convert_to_fp32(\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mmodel_forward\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m)\n", + "File \u001B[0;32m~/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:16\u001B[0m, in \u001B[0;36mautocast_decorator.<locals>.decorate_autocast\u001B[0;34m(*args, **kwargs)\u001B[0m\n\u001B[1;32m 13\u001B[0m \u001B[38;5;129m@functools\u001B[39m\u001B[38;5;241m.\u001B[39mwraps(func)\n\u001B[1;32m 14\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mdecorate_autocast\u001B[39m(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs):\n\u001B[1;32m 15\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m autocast_instance:\n\u001B[0;32m---> 16\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mfunc\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m~/.local/lib/python3.10/site-packages/peft/peft_model.py:1091\u001B[0m, in \u001B[0;36mPeftModelForCausalLM.forward\u001B[0;34m(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, task_ids, **kwargs)\u001B[0m\n\u001B[1;32m 1089\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m peft_config\u001B[38;5;241m.\u001B[39mpeft_type \u001B[38;5;241m==\u001B[39m PeftType\u001B[38;5;241m.\u001B[39mPOLY:\n\u001B[1;32m 1090\u001B[0m kwargs[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mtask_ids\u001B[39m\u001B[38;5;124m\"\u001B[39m] \u001B[38;5;241m=\u001B[39m task_ids\n\u001B[0;32m-> 1091\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbase_model\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 1092\u001B[0m \u001B[43m \u001B[49m\u001B[43minput_ids\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43minput_ids\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1093\u001B[0m \u001B[43m \u001B[49m\u001B[43mattention_mask\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mattention_mask\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1094\u001B[0m \u001B[43m \u001B[49m\u001B[43minputs_embeds\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43minputs_embeds\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1095\u001B[0m \u001B[43m \u001B[49m\u001B[43mlabels\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mlabels\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1096\u001B[0m \u001B[43m \u001B[49m\u001B[43moutput_attentions\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43moutput_attentions\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1097\u001B[0m \u001B[43m \u001B[49m\u001B[43moutput_hidden_states\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43moutput_hidden_states\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1098\u001B[0m \u001B[43m \u001B[49m\u001B[43mreturn_dict\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mreturn_dict\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1099\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 1100\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1102\u001B[0m batch_size \u001B[38;5;241m=\u001B[39m _get_batch_size(input_ids, inputs_embeds)\n\u001B[1;32m 1103\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m attention_mask \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[1;32m 1104\u001B[0m \u001B[38;5;66;03m# concat prompt attention mask\u001B[39;00m\n", + "File \u001B[0;32m~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1511\u001B[0m, in \u001B[0;36mModule._wrapped_call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m 1509\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_compiled_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs) \u001B[38;5;66;03m# type: ignore[misc]\u001B[39;00m\n\u001B[1;32m 1510\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m-> 1511\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_call_impl\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1520\u001B[0m, in \u001B[0;36mModule._call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m 1515\u001B[0m \u001B[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001B[39;00m\n\u001B[1;32m 1516\u001B[0m \u001B[38;5;66;03m# this function, and just call forward.\u001B[39;00m\n\u001B[1;32m 1517\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_pre_hooks\n\u001B[1;32m 1518\u001B[0m \u001B[38;5;129;01mor\u001B[39;00m _global_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_backward_hooks\n\u001B[1;32m 1519\u001B[0m \u001B[38;5;129;01mor\u001B[39;00m _global_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_forward_pre_hooks):\n\u001B[0;32m-> 1520\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mforward_call\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1522\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m 1523\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n", + "File \u001B[0;32m~/.local/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:160\u001B[0m, in \u001B[0;36mBaseTuner.forward\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m 159\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mforward\u001B[39m(\u001B[38;5;28mself\u001B[39m, \u001B[38;5;241m*\u001B[39margs: Any, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs: Any):\n\u001B[0;32m--> 160\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mmodel\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mforward\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m~/.local/lib/python3.10/site-packages/accelerate/hooks.py:166\u001B[0m, in \u001B[0;36madd_hook_to_module.<locals>.new_forward\u001B[0;34m(module, *args, **kwargs)\u001B[0m\n\u001B[1;32m 164\u001B[0m output \u001B[38;5;241m=\u001B[39m module\u001B[38;5;241m.\u001B[39m_old_forward(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[1;32m 165\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m--> 166\u001B[0m output \u001B[38;5;241m=\u001B[39m \u001B[43mmodule\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_old_forward\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 167\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m module\u001B[38;5;241m.\u001B[39m_hf_hook\u001B[38;5;241m.\u001B[39mpost_forward(module, output)\n", + "File \u001B[0;32m~/.local/lib/python3.10/site-packages/transformers/models/gemma/modeling_gemma.py:1088\u001B[0m, in \u001B[0;36mGemmaForCausalLM.forward\u001B[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, cache_position)\u001B[0m\n\u001B[1;32m 1086\u001B[0m hidden_states \u001B[38;5;241m=\u001B[39m outputs[\u001B[38;5;241m0\u001B[39m]\n\u001B[1;32m 1087\u001B[0m logits \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mlm_head(hidden_states)\n\u001B[0;32m-> 1088\u001B[0m logits \u001B[38;5;241m=\u001B[39m \u001B[43mlogits\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfloat\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1089\u001B[0m loss \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[1;32m 1090\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m labels \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[1;32m 1091\u001B[0m \u001B[38;5;66;03m# Shift so that tokens < n predict n\u001B[39;00m\n", + "\u001B[0;31mOutOfMemoryError\u001B[0m: CUDA out of memory. Tried to allocate 30.00 MiB. GPU 0 has a total capacity of 23.67 GiB of which 38.19 MiB is free. Process 36053 has 11.64 GiB memory in use. Process 31527 has 7.32 GiB memory in use. Including non-PyTorch memory, this process has 4.63 GiB memory in use. Of the allocated memory 4.34 GiB is allocated by PyTorch, and 9.73 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)" ] } ], @@ -232,9 +812,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Anaconda 2023.03", + "name": "python3", "language": "python", - "name": "anaconda_3_2023_03" + "display_name": "Python 3 (ipykernel)" }, "language_info": { "codemirror_mode": { -- GitLab