Skip to content
Snippets Groups Projects
Commit 6688c133 authored by Christian Boulanger's avatar Christian Boulanger
Browse files
parents 5dee5c1e 07d804c6
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id:31c318c0-46dc-44b1-be82-4703028ca744 tags:
# Finetuning Gemma with transformers/torch/cuda/peft
see https://huggingface.co/blog/gemma-peft
%% Cell type:markdown id:6bea4986-2a5b-4c02-bfda-fda113a513b9 tags:
## Install dependencies
%% Cell type:code id:13bfb820-9615-48bd-96a9-5f454f1e67a9 tags:
``` python
!pip install transformers datasets peft python-dotenv accelerate trl
!pip install -i https://pypi.org/simple/ bitsandbytes
!pip install --upgrade bottleneck
#!conda install -c pytorch pytorch -y
```
%% Cell type:markdown id:33bb741c-e55d-4403-bf35-0c303dfce389 tags:
## Show info on hardware
%% Cell type:code id:2588a200-6d74-476a-b0f3-a046bc2bb332 tags:
``` python
import os
import psutil
# RAM
ram_bytes = psutil.virtual_memory().total
ram_gb = ram_bytes / (1024**3)
print(f"Total RAM: {ram_gb:.2f} GB")
# CPU cores
cpu_cores = os.cpu_count()
print(f"Total CPU Cores: {cpu_cores}")
```
%% Cell type:markdown id:09e29396-1c1e-44ec-9e74-54a67873e91a tags:
## Download and quantize model
%% Cell type:code id:6b625baf-d995-43b8-b8ce-f95ec624c73a tags:
``` python
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from dotenv import load_dotenv
load_dotenv('env.txt')
model_id = "google/gemma-2b"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ['HF_TOKEN'], padding_side='right')
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0}, token=os.environ['HF_TOKEN'])
```
%% Output
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[2], line 13
6 model_id = "google/gemma-2b"
7 bnb_config = BitsAndBytesConfig(
8 load_in_4bit=True,
9 bnb_4bit_quant_type="nf4",
10 bnb_4bit_compute_dtype=torch.bfloat16
11 )
---> 13 tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ['HF_TOKEN'], padding_side='right')
14 model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0}, token=os.environ['HF_TOKEN'])
File /mpcdf/soft/SLE_15/packages/x86_64/anaconda/3/2023.03/lib/python3.10/os.py:680, in _Environ.__getitem__(self, key)
677 value = self._data[self.encodekey(key)]
678 except KeyError:
679 # raise KeyError with the original key value
--> 680 raise KeyError(key) from None
681 return self.decodevalue(value)
KeyError: 'HF_TOKEN'
%% Cell type:code id:dd0a49a1-9aeb-47e4-b19a-9a9bb2e4bcad tags:
``` python
text = "Quote: Imagination is more"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=200)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
```
%% Cell type:code id:b31b170e-280a-4742-8ef4-73b45ee87927 tags:
``` python
from datasets import load_dataset
data = load_dataset("Abirate/english_quotes")
data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True)
```
%% Cell type:code id:1839f1e8-b0d8-4d45-859b-011d2bf1f146 tags:
``` python
import transformers
from trl import SFTTrainer
from peft import LoraConfig
lora_config = LoraConfig(
r=8,
target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
task_type="CAUSAL_LM",
)
def formatting_func(example):
text = f"Quote: {example['quote'][0]}\nAuthor: {example['author'][0]}"
return [text]
trainer = SFTTrainer(
model=model,
train_dataset=data["train"],
max_seq_length=1024,
args=transformers.TrainingArguments(
per_device_train_batch_size=1,
gradient_accumulation_steps=4,
warmup_steps=2,
max_steps=10,
learning_rate=2e-4,
fp16=True,
logging_steps=1,
output_dir="outputs",
optim="paged_adamw_8bit"
),
peft_config=lora_config,
formatting_func=formatting_func,
)
trainer.train()
```
%% Output
/u/cboul/.local/lib/python3.10/site-packages/trl/trainer/sft_trainer.py:294: UserWarning: You passed a tokenizer with `padding_side` not equal to `right` to the SFTTrainer. This might lead to some unexpected behaviour due to overflow issues when training a model in half-precision. You might consider adding `tokenizer.padding_side = 'right'` to your code.
warnings.warn(
---------------------------------------------------------------------------
OutOfMemoryError Traceback (most recent call last)
Cell In[4], line 33
13 return [text]
15 trainer = SFTTrainer(
16 model=model,
17 train_dataset=data["train"],
(...)
31 formatting_func=formatting_func,
32 )
---> 33 trainer.train()
File ~/.local/lib/python3.10/site-packages/trl/trainer/sft_trainer.py:331, in SFTTrainer.train(self, *args, **kwargs)
328 if self.neftune_noise_alpha is not None and not self._trainer_supports_neftune:
329 self.model = self._trl_activate_neftune(self.model)
--> 331 output = super().train(*args, **kwargs)
333 # After training we make sure to retrieve back the original forward pass method
334 # for the embedding layer by removing the forward post hook.
335 if self.neftune_noise_alpha is not None and not self._trainer_supports_neftune:
File ~/.local/lib/python3.10/site-packages/transformers/trainer.py:1624, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1622 hf_hub_utils.enable_progress_bars()
1623 else:
-> 1624 return inner_training_loop(
1625 args=args,
1626 resume_from_checkpoint=resume_from_checkpoint,
1627 trial=trial,
1628 ignore_keys_for_eval=ignore_keys_for_eval,
1629 )
File ~/.local/lib/python3.10/site-packages/transformers/trainer.py:1961, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1958 self.control = self.callback_handler.on_step_begin(args, self.state, self.control)
1960 with self.accelerator.accumulate(model):
-> 1961 tr_loss_step = self.training_step(model, inputs)
1963 if (
1964 args.logging_nan_inf_filter
1965 and not is_torch_tpu_available()
1966 and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
1967 ):
1968 # if loss is nan or inf simply add the average of previous logged losses
1969 tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)
File ~/.local/lib/python3.10/site-packages/transformers/trainer.py:2902, in Trainer.training_step(self, model, inputs)
2899 return loss_mb.reduce_mean().detach().to(self.args.device)
2901 with self.compute_loss_context_manager():
-> 2902 loss = self.compute_loss(model, inputs)
2904 if self.args.n_gpu > 1:
2905 loss = loss.mean() # mean() to average on multi-gpu parallel training
File ~/.local/lib/python3.10/site-packages/transformers/trainer.py:2925, in Trainer.compute_loss(self, model, inputs, return_outputs)
2923 else:
2924 labels = None
-> 2925 outputs = model(**inputs)
2926 # Save past state if it exists
2927 # TODO: this needs to be fixed and made cleaner later.
2928 if self.args.past_index >= 0:
File ~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)
File ~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None
File ~/.local/lib/python3.10/site-packages/accelerate/utils/operations.py:817, in convert_outputs_to_fp32.<locals>.forward(*args, **kwargs)
816 def forward(*args, **kwargs):
--> 817 return model_forward(*args, **kwargs)
File ~/.local/lib/python3.10/site-packages/accelerate/utils/operations.py:805, in ConvertOutputsToFp32.__call__(self, *args, **kwargs)
804 def __call__(self, *args, **kwargs):
--> 805 return convert_to_fp32(self.model_forward(*args, **kwargs))
File ~/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:16, in autocast_decorator.<locals>.decorate_autocast(*args, **kwargs)
13 @functools.wraps(func)
14 def decorate_autocast(*args, **kwargs):
15 with autocast_instance:
---> 16 return func(*args, **kwargs)
File ~/.local/lib/python3.10/site-packages/peft/peft_model.py:1091, in PeftModelForCausalLM.forward(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, task_ids, **kwargs)
1089 if peft_config.peft_type == PeftType.POLY:
1090 kwargs["task_ids"] = task_ids
-> 1091 return self.base_model(
1092 input_ids=input_ids,
1093 attention_mask=attention_mask,
1094 inputs_embeds=inputs_embeds,
1095 labels=labels,
1096 output_attentions=output_attentions,
1097 output_hidden_states=output_hidden_states,
1098 return_dict=return_dict,
1099 **kwargs,
1100 )
1102 batch_size = _get_batch_size(input_ids, inputs_embeds)
1103 if attention_mask is not None:
1104 # concat prompt attention mask
File ~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)
File ~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None
File ~/.local/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:160, in BaseTuner.forward(self, *args, **kwargs)
159 def forward(self, *args: Any, **kwargs: Any):
--> 160 return self.model.forward(*args, **kwargs)
File ~/.local/lib/python3.10/site-packages/accelerate/hooks.py:166, in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs)
164 output = module._old_forward(*args, **kwargs)
165 else:
--> 166 output = module._old_forward(*args, **kwargs)
167 return module._hf_hook.post_forward(module, output)
File ~/.local/lib/python3.10/site-packages/transformers/models/gemma/modeling_gemma.py:1088, in GemmaForCausalLM.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, cache_position)
1086 hidden_states = outputs[0]
1087 logits = self.lm_head(hidden_states)
-> 1088 logits = logits.float()
1089 loss = None
1090 if labels is not None:
1091 # Shift so that tokens < n predict n
OutOfMemoryError: CUDA out of memory. Tried to allocate 30.00 MiB. GPU 0 has a total capacity of 23.67 GiB of which 38.19 MiB is free. Process 36053 has 11.64 GiB memory in use. Process 31527 has 7.32 GiB memory in use. Including non-PyTorch memory, this process has 4.63 GiB memory in use. Of the allocated memory 4.34 GiB is allocated by PyTorch, and 9.73 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
%% Cell type:code id:4c531e12-f314-47a4-bcde-f6a485c2891e tags:
``` python
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment