|
45 | 45 | pass |
46 | 46 | from huggingface_hub import HfFileSystem |
47 | 47 | import importlib.util |
| 48 | +from ..device_type import ( |
| 49 | + is_hip, |
| 50 | + get_device_type, |
| 51 | + DEVICE_TYPE, |
| 52 | + DEVICE_TYPE_TORCH, |
| 53 | + DEVICE_COUNT, |
| 54 | + ALLOW_PREQUANTIZED_MODELS, |
| 55 | +) |
48 | 56 |
|
49 | 57 | # https://github.com/huggingface/transformers/pull/26037 allows 4 bit loading! |
50 | 58 | from unsloth_zoo.utils import Version, _get_dtype |
@@ -195,6 +203,12 @@ def from_pretrained( |
195 | 203 | old_model_name = model_name |
196 | 204 | if not use_exact_model_name: |
197 | 205 | model_name = get_model_name(model_name, load_in_4bit) |
| 206 | + # Check if pre-quantized models are allowed |
| 207 | + # For eg AMD GPUs need blocksize = 128, but our pre-quants are blocksize = 64 |
| 208 | + if not ALLOW_PREQUANTIZED_MODELS and model_name.endswith(("-unsloth-bnb-4bit", "-bnb-4bit")): |
| 209 | + model_name = model_name.removesuffix("-unsloth-bnb-4bit") |
| 210 | + model_name = model_name.removesuffix("-bnb-4bit") |
| 211 | + pass |
198 | 212 |
|
199 | 213 | if USE_MODELSCOPE and not os.path.exists(model_name): |
200 | 214 | from modelscope import snapshot_download |
@@ -306,6 +320,12 @@ def from_pretrained( |
306 | 320 | model_name = peft_config.base_model_name_or_path |
307 | 321 | if not use_exact_model_name: |
308 | 322 | model_name = get_model_name(model_name, load_in_4bit) |
| 323 | + # Check if pre-quantized models are allowed |
| 324 | + # For eg AMD GPUs need blocksize = 128, but our pre-quants are blocksize = 64 |
| 325 | + if not ALLOW_PREQUANTIZED_MODELS and model_name.endswith(("-unsloth-bnb-4bit", "-bnb-4bit")): |
| 326 | + model_name = model_name.removesuffix("-unsloth-bnb-4bit") |
| 327 | + model_name = model_name.removesuffix("-bnb-4bit") |
| 328 | + pass |
309 | 329 | model_config = AutoConfig.from_pretrained( |
310 | 330 | model_name, |
311 | 331 | token = token, |
@@ -618,6 +638,12 @@ def from_pretrained( |
618 | 638 | old_model_name = model_name |
619 | 639 | if not use_exact_model_name: |
620 | 640 | model_name = get_model_name(model_name, load_in_4bit) |
| 641 | + # Check if pre-quantized models are allowed |
| 642 | + # For eg AMD GPUs need blocksize = 128, but our pre-quants are blocksize = 64 |
| 643 | + if not ALLOW_PREQUANTIZED_MODELS and model_name.endswith(("-unsloth-bnb-4bit", "-bnb-4bit")): |
| 644 | + model_name = model_name.removesuffix("-unsloth-bnb-4bit") |
| 645 | + model_name = model_name.removesuffix("-bnb-4bit") |
| 646 | + pass |
621 | 647 |
|
622 | 648 | # Check modelscope |
623 | 649 | if USE_MODELSCOPE and not os.path.exists(model_name): |
@@ -833,7 +859,12 @@ def from_pretrained( |
833 | 859 | model_name = peft_config.base_model_name_or_path |
834 | 860 | if not use_exact_model_name: |
835 | 861 | model_name = get_model_name(model_name, load_in_4bit) |
836 | | - |
| 862 | + # Check if pre-quantized models are allowed |
| 863 | + # For eg AMD GPUs need blocksize = 128, but our pre-quants are blocksize = 64 |
| 864 | + if not ALLOW_PREQUANTIZED_MODELS and model_name.endswith(("-unsloth-bnb-4bit", "-bnb-4bit")): |
| 865 | + model_name = model_name.removesuffix("-unsloth-bnb-4bit") |
| 866 | + model_name = model_name.removesuffix("-bnb-4bit") |
| 867 | + pass |
837 | 868 | model_config = AutoConfig.from_pretrained( |
838 | 869 | model_name, |
839 | 870 | token = token, |
|
0 commit comments