Skip to content

Commit 3d8f66a

Browse files
authored
Merge pull request #126 from RohitDhankar/dev_torn
local --cuda out of memory == 3869MiB / 3910MiB
2 parents d601a65 + 8aba3ce commit 3d8f66a

7 files changed

+179
-0
lines changed

src/LLAMA_v2/log_files/init_installs_log_1.log

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
conda activate env_hface
44

5+
https://huggingface.co/abhishek/llama-2-7b-hf-small-shards
6+
57

68
https://github.com/huggingface/autotrain-advanced
79
/home/dhankar/temp/08_23/a___main_tutes_DL/a__LLM/Abhi_Thakur/orig_auto_train/autotrain-advanced
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
> INFO Installing latest transformers@main
2+
> INFO Successfully installed latest transformers
3+
> INFO Installing latest peft@main
4+
> INFO Successfully installed latest peft
5+
> INFO Installing latest diffusers@main
6+
> INFO Successfully installed latest diffusers
7+
> INFO Installing latest trl@main
8+
> INFO Successfully installed latest trl
9+
> INFO Installing latest xformers
10+
> INFO Successfully installed latest xformers
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
> INFO Running LLM
2+
> INFO Params: Namespace(version=False, train=True, deploy=False, inference=False, data_path='.', train_split='train', valid_split=None, text_column='text', model='abhishek/llama-2-7b-hf-small-shards', learning_rate=0.0002, num_train_epochs=3, train_batch_size=3, warmup_ratio=0.1, gradient_accumulation_steps=1, optimizer='adamw_torch', scheduler='linear', weight_decay=0.0, max_grad_norm=1.0, seed=42, add_eos_token=False, block_size=-1, use_peft=True, lora_r=16, lora_alpha=32, lora_dropout=0.05, logging_steps=-1, project_name='llm_1a', evaluation_strategy='epoch', save_total_limit=1, save_strategy='epoch', auto_find_batch_size=False, fp16=False, push_to_hub=False, use_int8=False, model_max_length=1024, repo_id=None, use_int4=True, trainer='sft', target_modules=None, merge_adapter=False, token=None, backend='default', username=None, use_flash_attention_2=False, func=<function run_llm_command_factory at 0x7f2efba84c20>)
3+
> INFO loading dataset from csv
4+
Loading the tokenizer from the `special_tokens_map.json` and the `added_tokens.json` will be removed in `transformers 5`, it is kept for forward compatibility, but it is recommended to update your `tokenizer_config.json` by uploading it again. You will see the new `added_tokens_decoder` attribute that will store the relevant information.
5+
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
6+
7+
Loading checkpoint shards: 0%| | 0/10 [00:00<?, ?it/s]
8+
Loading checkpoint shards: 10%|█ | 1/10 [00:28<04:15, 28.35s/it]
9+
Loading checkpoint shards: 20%|██ | 2/10 [00:54<03:36, 27.11s/it]
10+
Loading checkpoint shards: 30%|███ | 3/10 [01:21<03:07, 26.85s/it]
11+
Loading checkpoint shards: 40%|████ | 4/10 [01:47<02:39, 26.63s/it]
12+
Loading checkpoint shards: 50%|█████ | 5/10 [02:13<02:11, 26.37s/it]
13+
Loading checkpoint shards: 60%|██████ | 6/10 [02:39<01:45, 26.37s/it]
14+
Loading checkpoint shards: 70%|███████ | 7/10 [03:20<01:33, 31.24s/it]
15+
Loading checkpoint shards: 70%|███████ | 7/10 [03:54<01:40, 33.44s/it]
16+
> ERROR train has failed due to an exception:
17+
> ERROR Traceback (most recent call last):
18+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/autotrain/utils.py", line 280, in wrapper
19+
return func(*args, **kwargs)
20+
^^^^^^^^^^^^^^^^^^^^^
21+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/autotrain/trainers/clm/__main__.py", line 122, in train
22+
model = AutoModelForCausalLM.from_pretrained(
23+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
24+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 565, in from_pretrained
25+
return model_class.from_pretrained(
26+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
27+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3303, in from_pretrained
28+
) = cls._load_pretrained_model(
29+
^^^^^^^^^^^^^^^^^^^^^^^^^^^
30+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3691, in _load_pretrained_model
31+
new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model(
32+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
33+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/modeling_utils.py", line 749, in _load_state_dict_into_meta_model
34+
set_module_quantized_tensor_to_device(
35+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/integrations/bitsandbytes.py", line 98, in set_module_quantized_tensor_to_device
36+
new_value = bnb.nn.Params4bit(new_value, requires_grad=False, **kwargs).to(device)
37+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
38+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/bitsandbytes/nn/modules.py", line 179, in to
39+
return self.cuda(device)
40+
^^^^^^^^^^^^^^^^^
41+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/bitsandbytes/nn/modules.py", line 157, in cuda
42+
w_4bit, quant_state = bnb.functional.quantize_4bit(w, blocksize=self.blocksize, compress_statistics=self.compress_statistics, quant_type=self.quant_type)
43+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
44+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/bitsandbytes/functional.py", line 816, in quantize_4bit
45+
out = torch.zeros(((n+1)//2, 1), dtype=torch.uint8, device=A.device)
46+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
47+
torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 22.00 MiB (GPU 0; 3.82 GiB total capacity; 2.98 GiB already allocated; 20.50 MiB free; 3.05 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
48+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
> INFO Running LLM
2+
> INFO Params: Namespace(version=False, train=True, deploy=False, inference=False, data_path='.', train_split='train', valid_split=None, text_column='text', model='abhishek/llama-2-7b-hf-small-shards', learning_rate=0.0002, num_train_epochs=3, train_batch_size=2, warmup_ratio=0.1, gradient_accumulation_steps=1, optimizer='adamw_torch', scheduler='linear', weight_decay=0.0, max_grad_norm=1.0, seed=42, add_eos_token=False, block_size=-1, use_peft=True, lora_r=16, lora_alpha=32, lora_dropout=0.05, logging_steps=-1, project_name='llm_1a', evaluation_strategy='epoch', save_total_limit=1, save_strategy='epoch', auto_find_batch_size=False, fp16=False, push_to_hub=False, use_int8=False, model_max_length=1024, repo_id=None, use_int4=True, trainer='sft', target_modules=None, merge_adapter=False, token=None, backend='default', username=None, use_flash_attention_2=False, func=<function run_llm_command_factory at 0x7ff120314c20>)
3+
> INFO loading dataset from csv
4+
Loading the tokenizer from the `special_tokens_map.json` and the `added_tokens.json` will be removed in `transformers 5`, it is kept for forward compatibility, but it is recommended to update your `tokenizer_config.json` by uploading it again. You will see the new `added_tokens_decoder` attribute that will store the relevant information.
5+
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
6+
7+
Loading checkpoint shards: 0%| | 0/10 [00:00<?, ?it/s]
8+
Loading checkpoint shards: 10%|█ | 1/10 [00:26<04:01, 26.81s/it]
9+
Loading checkpoint shards: 20%|██ | 2/10 [00:51<03:26, 25.78s/it]
10+
Loading checkpoint shards: 30%|███ | 3/10 [01:11<02:40, 22.94s/it]
11+
Loading checkpoint shards: 40%|████ | 4/10 [01:31<02:11, 21.94s/it]
12+
Loading checkpoint shards: 50%|█████ | 5/10 [01:47<01:38, 19.79s/it]
13+
Loading checkpoint shards: 60%|██████ | 6/10 [02:03<01:13, 18.43s/it]
14+
Loading checkpoint shards: 70%|███████ | 7/10 [02:19<00:53, 17.76s/it]
15+
Loading checkpoint shards: 70%|███████ | 7/10 [02:33<01:05, 21.93s/it]
16+
> ERROR train has failed due to an exception:
17+
> ERROR Traceback (most recent call last):
18+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/autotrain/utils.py", line 280, in wrapper
19+
return func(*args, **kwargs)
20+
^^^^^^^^^^^^^^^^^^^^^
21+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/autotrain/trainers/clm/__main__.py", line 122, in train
22+
model = AutoModelForCausalLM.from_pretrained(
23+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
24+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 565, in from_pretrained
25+
return model_class.from_pretrained(
26+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
27+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3303, in from_pretrained
28+
) = cls._load_pretrained_model(
29+
^^^^^^^^^^^^^^^^^^^^^^^^^^^
30+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3691, in _load_pretrained_model
31+
new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model(
32+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
33+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/modeling_utils.py", line 749, in _load_state_dict_into_meta_model
34+
set_module_quantized_tensor_to_device(
35+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/integrations/bitsandbytes.py", line 98, in set_module_quantized_tensor_to_device
36+
new_value = bnb.nn.Params4bit(new_value, requires_grad=False, **kwargs).to(device)
37+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
38+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/bitsandbytes/nn/modules.py", line 179, in to
39+
return self.cuda(device)
40+
^^^^^^^^^^^^^^^^^
41+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/bitsandbytes/nn/modules.py", line 156, in cuda
42+
w = self.data.contiguous().half().cuda(device)
43+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
44+
torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 86.00 MiB (GPU 0; 3.82 GiB total capacity; 2.95 GiB already allocated; 40.75 MiB free; 3.08 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
45+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
> INFO Running LLM
2+
> INFO Params: Namespace(version=False, train=True, deploy=False, inference=False, data_path='.', train_split='train',
3+
valid_split=None, text_column='text', model='abhishek/llama-2-7b-hf-small-shards', learning_rate=0.0002,
4+
num_train_epochs=3, train_batch_size=12, warmup_ratio=0.1, gradient_accumulation_steps=1,
5+
optimizer='adamw_torch', scheduler='linear', weight_decay=0.0, max_grad_norm=1.0, seed=42,
6+
add_eos_token=False, block_size=-1, use_peft=True, lora_r=16, lora_alpha=32, lora_dropout=0.05,
7+
logging_steps=-1, project_name='llm_1a', evaluation_strategy='epoch', save_total_limit=1,
8+
save_strategy='epoch', auto_find_batch_size=False, fp16=False, push_to_hub=False, use_int8=False, model_max_length=1024,
9+
repo_id=None, use_int4=True, trainer='sft', target_modules=None, merge_adapter=False, token=None,
10+
backend='default', username=None, use_flash_attention_2=False, func=<function run_llm_command_factory at 0x7fa70ed86520>)
11+
> INFO loading dataset from csv
12+
> ERROR train has failed due to an exception:
13+
> ERROR Traceback (most recent call last):
14+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/autotrain/utils.py", line 280, in wrapper
15+
return func(*args, **kwargs)
16+
^^^^^^^^^^^^^^^^^^^^^
17+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/autotrain/trainers/clm/__main__.py", line 122, in train
18+
model = AutoModelForCausalLM.from_pretrained(
19+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
20+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 493, in from_pretrained
21+
return model_class.from_pretrained(
22+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
23+
File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/modeling_utils.py", line 2700, in from_pretrained
24+
model = cls(config, *model_args, **model_kwargs)
25+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
26+
TypeError: LlamaForCausalLM.__init__() got an unexpected keyword argument 'use_flash_attention_2'
27+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
{
2+
"model": "abhishek/llama-2-7b-hf-small-shards",
3+
"data_path": ".",
4+
"project_name": "llm_1a",
5+
"train_split": "train",
6+
"valid_split": null,
7+
"text_column": "text",
8+
"token": null,
9+
"lr": 0.0002,
10+
"epochs": 3,
11+
"batch_size": 2,
12+
"warmup_ratio": 0.1,
13+
"gradient_accumulation": 1,
14+
"optimizer": "adamw_torch",
15+
"scheduler": "linear",
16+
"weight_decay": 0.0,
17+
"max_grad_norm": 1.0,
18+
"seed": 42,
19+
"add_eos_token": false,
20+
"block_size": -1,
21+
"use_peft": true,
22+
"lora_r": 16,
23+
"lora_alpha": 32,
24+
"lora_dropout": 0.05,
25+
"logging_steps": -1,
26+
"evaluation_strategy": "epoch",
27+
"save_total_limit": 1,
28+
"save_strategy": "epoch",
29+
"auto_find_batch_size": false,
30+
"fp16": false,
31+
"push_to_hub": false,
32+
"use_int8": false,
33+
"model_max_length": 1024,
34+
"repo_id": null,
35+
"use_int4": true,
36+
"trainer": "sft",
37+
"target_modules": null,
38+
"merge_adapter": false,
39+
"username": null,
40+
"use_flash_attention_2": false
41+
}

src/LLAMA_v2/log_files/utils.py

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
2+
3+
import pandas as pd
4+
df = pd.read_parquet('train-00000-of-00001-a09b74b3ef9c3b56.parquet')
5+
#train-00000-of-00001-a09b74b3ef9c3b56
6+
df.to_csv('train.csv', index=False)

0 commit comments

Comments
 (0)