diff --git a/src/LLAMA_v2/log_files/init_installs_log_1.log b/src/LLAMA_v2/log_files/init_installs_log_1.log index 0b2e814..0836ee3 100644 --- a/src/LLAMA_v2/log_files/init_installs_log_1.log +++ b/src/LLAMA_v2/log_files/init_installs_log_1.log @@ -2,6 +2,8 @@ conda activate env_hface +https://huggingface.co/abhishek/llama-2-7b-hf-small-shards + https://github.com/huggingface/autotrain-advanced /home/dhankar/temp/08_23/a___main_tutes_DL/a__LLM/Abhi_Thakur/orig_auto_train/autotrain-advanced diff --git a/src/LLAMA_v2/log_files/term_log__setup__10_02__1930h__SETUPLogs__.log b/src/LLAMA_v2/log_files/term_log__setup__10_02__1930h__SETUPLogs__.log new file mode 100644 index 0000000..461ca08 --- /dev/null +++ b/src/LLAMA_v2/log_files/term_log__setup__10_02__1930h__SETUPLogs__.log @@ -0,0 +1,10 @@ +> INFO Installing latest transformers@main +> INFO Successfully installed latest transformers +> INFO Installing latest peft@main +> INFO Successfully installed latest peft +> INFO Installing latest diffusers@main +> INFO Successfully installed latest diffusers +> INFO Installing latest trl@main +> INFO Successfully installed latest trl +> INFO Installing latest xformers +> INFO Successfully installed latest xformers diff --git a/src/LLAMA_v2/log_files/term_log__train_2_10_02__2130h___.log b/src/LLAMA_v2/log_files/term_log__train_2_10_02__2130h___.log new file mode 100644 index 0000000..e6e9b9b --- /dev/null +++ b/src/LLAMA_v2/log_files/term_log__train_2_10_02__2130h___.log @@ -0,0 +1,48 @@ +> INFO Running LLM +> INFO Params: Namespace(version=False, train=True, deploy=False, inference=False, data_path='.', train_split='train', valid_split=None, text_column='text', model='abhishek/llama-2-7b-hf-small-shards', learning_rate=0.0002, num_train_epochs=3, train_batch_size=3, warmup_ratio=0.1, gradient_accumulation_steps=1, optimizer='adamw_torch', scheduler='linear', weight_decay=0.0, max_grad_norm=1.0, seed=42, add_eos_token=False, block_size=-1, use_peft=True, lora_r=16, lora_alpha=32, lora_dropout=0.05, logging_steps=-1, project_name='llm_1a', evaluation_strategy='epoch', save_total_limit=1, save_strategy='epoch', auto_find_batch_size=False, fp16=False, push_to_hub=False, use_int8=False, model_max_length=1024, repo_id=None, use_int4=True, trainer='sft', target_modules=None, merge_adapter=False, token=None, backend='default', username=None, use_flash_attention_2=False, func=) +> INFO loading dataset from csv +Loading the tokenizer from the `special_tokens_map.json` and the `added_tokens.json` will be removed in `transformers 5`, it is kept for forward compatibility, but it is recommended to update your `tokenizer_config.json` by uploading it again. You will see the new `added_tokens_decoder` attribute that will store the relevant information. +Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. + +Loading checkpoint shards: 0%| | 0/10 [00:00 ERROR train has failed due to an exception: +> ERROR Traceback (most recent call last): + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/autotrain/utils.py", line 280, in wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/autotrain/trainers/clm/__main__.py", line 122, in train + model = AutoModelForCausalLM.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 565, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3303, in from_pretrained + ) = cls._load_pretrained_model( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3691, in _load_pretrained_model + new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/modeling_utils.py", line 749, in _load_state_dict_into_meta_model + set_module_quantized_tensor_to_device( + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/integrations/bitsandbytes.py", line 98, in set_module_quantized_tensor_to_device + new_value = bnb.nn.Params4bit(new_value, requires_grad=False, **kwargs).to(device) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/bitsandbytes/nn/modules.py", line 179, in to + return self.cuda(device) + ^^^^^^^^^^^^^^^^^ + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/bitsandbytes/nn/modules.py", line 157, in cuda + w_4bit, quant_state = bnb.functional.quantize_4bit(w, blocksize=self.blocksize, compress_statistics=self.compress_statistics, quant_type=self.quant_type) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/bitsandbytes/functional.py", line 816, in quantize_4bit + out = torch.zeros(((n+1)//2, 1), dtype=torch.uint8, device=A.device) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 22.00 MiB (GPU 0; 3.82 GiB total capacity; 2.98 GiB already allocated; 20.50 MiB free; 3.05 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF + diff --git a/src/LLAMA_v2/log_files/term_log__train_3_.log b/src/LLAMA_v2/log_files/term_log__train_3_.log new file mode 100644 index 0000000..c4304d7 --- /dev/null +++ b/src/LLAMA_v2/log_files/term_log__train_3_.log @@ -0,0 +1,45 @@ +> INFO Running LLM +> INFO Params: Namespace(version=False, train=True, deploy=False, inference=False, data_path='.', train_split='train', valid_split=None, text_column='text', model='abhishek/llama-2-7b-hf-small-shards', learning_rate=0.0002, num_train_epochs=3, train_batch_size=2, warmup_ratio=0.1, gradient_accumulation_steps=1, optimizer='adamw_torch', scheduler='linear', weight_decay=0.0, max_grad_norm=1.0, seed=42, add_eos_token=False, block_size=-1, use_peft=True, lora_r=16, lora_alpha=32, lora_dropout=0.05, logging_steps=-1, project_name='llm_1a', evaluation_strategy='epoch', save_total_limit=1, save_strategy='epoch', auto_find_batch_size=False, fp16=False, push_to_hub=False, use_int8=False, model_max_length=1024, repo_id=None, use_int4=True, trainer='sft', target_modules=None, merge_adapter=False, token=None, backend='default', username=None, use_flash_attention_2=False, func=) +> INFO loading dataset from csv +Loading the tokenizer from the `special_tokens_map.json` and the `added_tokens.json` will be removed in `transformers 5`, it is kept for forward compatibility, but it is recommended to update your `tokenizer_config.json` by uploading it again. You will see the new `added_tokens_decoder` attribute that will store the relevant information. +Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. + +Loading checkpoint shards: 0%| | 0/10 [00:00 ERROR train has failed due to an exception: +> ERROR Traceback (most recent call last): + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/autotrain/utils.py", line 280, in wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/autotrain/trainers/clm/__main__.py", line 122, in train + model = AutoModelForCausalLM.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 565, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3303, in from_pretrained + ) = cls._load_pretrained_model( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3691, in _load_pretrained_model + new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/modeling_utils.py", line 749, in _load_state_dict_into_meta_model + set_module_quantized_tensor_to_device( + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/integrations/bitsandbytes.py", line 98, in set_module_quantized_tensor_to_device + new_value = bnb.nn.Params4bit(new_value, requires_grad=False, **kwargs).to(device) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/bitsandbytes/nn/modules.py", line 179, in to + return self.cuda(device) + ^^^^^^^^^^^^^^^^^ + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/bitsandbytes/nn/modules.py", line 156, in cuda + w = self.data.contiguous().half().cuda(device) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 86.00 MiB (GPU 0; 3.82 GiB total capacity; 2.95 GiB already allocated; 40.75 MiB free; 3.08 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF + diff --git a/src/LLAMA_v2/log_files/term_log__train__10_02__1900h_.log b/src/LLAMA_v2/log_files/term_log__train__10_02__1900h_.log new file mode 100644 index 0000000..50eec21 --- /dev/null +++ b/src/LLAMA_v2/log_files/term_log__train__10_02__1900h_.log @@ -0,0 +1,27 @@ +> INFO Running LLM +> INFO Params: Namespace(version=False, train=True, deploy=False, inference=False, data_path='.', train_split='train', +valid_split=None, text_column='text', model='abhishek/llama-2-7b-hf-small-shards', learning_rate=0.0002, +num_train_epochs=3, train_batch_size=12, warmup_ratio=0.1, gradient_accumulation_steps=1, +optimizer='adamw_torch', scheduler='linear', weight_decay=0.0, max_grad_norm=1.0, seed=42, +add_eos_token=False, block_size=-1, use_peft=True, lora_r=16, lora_alpha=32, lora_dropout=0.05, +logging_steps=-1, project_name='llm_1a', evaluation_strategy='epoch', save_total_limit=1, +save_strategy='epoch', auto_find_batch_size=False, fp16=False, push_to_hub=False, use_int8=False, model_max_length=1024, +repo_id=None, use_int4=True, trainer='sft', target_modules=None, merge_adapter=False, token=None, +backend='default', username=None, use_flash_attention_2=False, func=) +> INFO loading dataset from csv +> ERROR train has failed due to an exception: +> ERROR Traceback (most recent call last): + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/autotrain/utils.py", line 280, in wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/autotrain/trainers/clm/__main__.py", line 122, in train + model = AutoModelForCausalLM.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 493, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/dhankar/anaconda3/envs/env_hface/lib/python3.11/site-packages/transformers/modeling_utils.py", line 2700, in from_pretrained + model = cls(config, *model_args, **model_kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +TypeError: LlamaForCausalLM.__init__() got an unexpected keyword argument 'use_flash_attention_2' + diff --git a/src/LLAMA_v2/log_files/training_params.json b/src/LLAMA_v2/log_files/training_params.json new file mode 100644 index 0000000..739e2e6 --- /dev/null +++ b/src/LLAMA_v2/log_files/training_params.json @@ -0,0 +1,41 @@ +{ + "model": "abhishek/llama-2-7b-hf-small-shards", + "data_path": ".", + "project_name": "llm_1a", + "train_split": "train", + "valid_split": null, + "text_column": "text", + "token": null, + "lr": 0.0002, + "epochs": 3, + "batch_size": 2, + "warmup_ratio": 0.1, + "gradient_accumulation": 1, + "optimizer": "adamw_torch", + "scheduler": "linear", + "weight_decay": 0.0, + "max_grad_norm": 1.0, + "seed": 42, + "add_eos_token": false, + "block_size": -1, + "use_peft": true, + "lora_r": 16, + "lora_alpha": 32, + "lora_dropout": 0.05, + "logging_steps": -1, + "evaluation_strategy": "epoch", + "save_total_limit": 1, + "save_strategy": "epoch", + "auto_find_batch_size": false, + "fp16": false, + "push_to_hub": false, + "use_int8": false, + "model_max_length": 1024, + "repo_id": null, + "use_int4": true, + "trainer": "sft", + "target_modules": null, + "merge_adapter": false, + "username": null, + "use_flash_attention_2": false +} \ No newline at end of file diff --git a/src/LLAMA_v2/log_files/utils.py b/src/LLAMA_v2/log_files/utils.py new file mode 100644 index 0000000..3122edf --- /dev/null +++ b/src/LLAMA_v2/log_files/utils.py @@ -0,0 +1,6 @@ + + +import pandas as pd +df = pd.read_parquet('train-00000-of-00001-a09b74b3ef9c3b56.parquet') +#train-00000-of-00001-a09b74b3ef9c3b56 +df.to_csv('train.csv', index=False) \ No newline at end of file