|
| 1 | +<!--Copyright 2022 The HuggingFace Team. All rights reserved. |
| 2 | + |
| 3 | +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with |
| 4 | +the License. You may obtain a copy of the License at |
| 5 | + |
| 6 | +http://www.apache.org/licenses/LICENSE-2.0 |
| 7 | + |
| 8 | +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on |
| 9 | +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the |
| 10 | +specific language governing permissions and limitations under the License. |
| 11 | +--> |
| 12 | + |
| 13 | +# AutoClassλ‘ μ¬μ νμ΅λ μΈμ€ν΄μ€ λ‘λ[[Load pretrained instances with an AutoClass]] |
| 14 | + |
| 15 | +νΈλμ€ν¬λ¨Έ μν€ν
μ²κ° λ§€μ° λ€μνκΈ° λλ¬Έμ 체ν¬ν¬μΈνΈμ λ§λ μν€ν
μ²λ₯Ό μμ±νλ κ²μ΄ μ΄λ €μΈ μ μμ΅λλ€. λΌμ΄λΈλ¬λ¦¬λ₯Ό μ½κ³ κ°λ¨νλ©° μ μ°νκ² μ¬μ©νκΈ° μν Transformer ν΅μ¬ μ² νμ μΌνμΌλ‘, `AutoClass`λ μ£Όμ΄μ§ 체ν¬ν¬μΈνΈμμ μ¬λ°λ₯Έ μν€ν
μ²λ₯Ό μλμΌλ‘ μΆλ‘ νμ¬ λ‘λν©λλ€. `from_pretrained()` λ©μλλ₯Ό μ¬μ©νλ©΄ λͺ¨λ μν€ν
μ²μ λν΄ μ¬μ νμ΅λ λͺ¨λΈμ λΉ λ₯΄κ² λ‘λν μ μμΌλ―λ‘ λͺ¨λΈμ μ²μλΆν° νμ΅νλ λ° μκ°κ³Ό 리μμ€λ₯Ό ν¬μ
ν νμκ° μμ΅λλ€. μ΄λ¬ν μ νμ 체ν¬ν¬μΈνΈμ ꡬμ λ°μ§ μλ μ½λλ₯Ό μμ±νλ€λ κ²μ μ½λκ° ν 체ν¬ν¬μΈνΈμμ μλνλ€λ©΄ μν€ν
μ²κ° λ€λ₯΄λλΌλ μ μ¬ν μμ
μ λν΄ νμ΅λ κ²μ΄λΌλ©΄ λ€λ₯Έ 체ν¬ν¬μΈνΈμμλ μλνλ€λ κ²μ μλ―Έν©λλ€. |
| 16 | + |
| 17 | +<Tip> |
| 18 | + |
| 19 | +μν€ν
μ²λ λͺ¨λΈμ 골격μ μλ―Ένλ©° 체ν¬ν¬μΈνΈλ μ£Όμ΄μ§ μν€ν
μ²μ λν κ°μ€μΉμ
λλ€. μλ₯Ό λ€μ΄, [BERT](https://huggingface.co/bert-base-uncased)λ μν€ν
μ²μ΄κ³ , `bert-base-uncased`λ 체ν¬ν¬μΈνΈμ
λλ€. λͺ¨λΈμ μν€ν
μ² λλ 체ν¬ν¬μΈνΈλ₯Ό μλ―Έν μ μλ μΌλ°μ μΈ μ©μ΄μ
λλ€. |
| 20 | + |
| 21 | +</Tip> |
| 22 | + |
| 23 | +μ΄ νν 리μΌμμλ λ€μμ νμ΅ν©λλ€: |
| 24 | + |
| 25 | +* μ¬μ νμ΅λ ν ν¬λμ΄μ λ‘λνκΈ°. |
| 26 | +* μ¬μ νμ΅λ μ΄λ―Έμ§ νλ‘μΈμ λ‘λνκΈ° |
| 27 | +* μ¬μ νμ΅λ νΉμ§ μΆμΆκΈ° λ‘λνκΈ°. |
| 28 | +* μ¬μ νλ ¨λ νλ‘μΈμ λ‘λνκΈ°. |
| 29 | +* μ¬μ νμ΅λ λͺ¨λΈ λ‘λνκΈ°. |
| 30 | + |
| 31 | +## AutoTokenizer |
| 32 | + |
| 33 | +κ±°μ λͺ¨λ NLP μμ
μ ν ν¬λμ΄μ λ‘ μμλ©λλ€. ν ν¬λμ΄μ λ μ¬μ©μμ μ
λ ₯μ λͺ¨λΈμμ μ²λ¦¬ν μ μλ νμμΌλ‘ λ³νν©λλ€. |
| 34 | +[`AutoTokenizer.from_pretrained`]λ‘ ν ν¬λμ΄μ λ₯Ό λ‘λν©λλ€: |
| 35 | + |
| 36 | +```py |
| 37 | +>>> from transformers import AutoTokenizer |
| 38 | + |
| 39 | +>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") |
| 40 | +``` |
| 41 | +
|
| 42 | +κ·Έλ¦¬κ³ λ€μ μλμ κ°μ΄ μ
λ ₯μ ν ν°νν©λλ€: |
| 43 | +
|
| 44 | +```py |
| 45 | +>>> sequence = "In a hole in the ground there lived a hobbit." |
| 46 | +>>> print(tokenizer(sequence)) |
| 47 | +{'input_ids': [101, 1999, 1037, 4920, 1999, 1996, 2598, 2045, 2973, 1037, 7570, 10322, 4183, 1012, 102], |
| 48 | + 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], |
| 49 | + 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]} |
| 50 | +``` |
| 51 | + |
| 52 | +## AutoImageProcessor |
| 53 | + |
| 54 | +λΉμ μμ
μ κ²½μ° μ΄λ―Έμ§ νλ‘μΈμκ° μ΄λ―Έμ§λ₯Ό μ¬λ°λ₯Έ μ
λ ₯ νμμΌλ‘ μ²λ¦¬ν©λλ€. |
| 55 | + |
| 56 | +```py |
| 57 | +>>> from transformers import AutoImageProcessor |
| 58 | + |
| 59 | +>>> image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224") |
| 60 | +``` |
| 61 | + |
| 62 | + |
| 63 | +## AutoFeatureExtractor |
| 64 | + |
| 65 | +μ€λμ€ μμ
μ κ²½μ° νΉμ§ μΆμΆκΈ°κ° μ€λμ€ μ νΈλ₯Ό μ¬λ°λ₯Έ μ
λ ₯ νμμΌλ‘ μ²λ¦¬ν©λλ€. |
| 66 | + |
| 67 | +[`AutoFeatureExtractor.from_pretrained`]λ‘ νΉμ§ μΆμΆκΈ°λ₯Ό λ‘λν©λλ€: |
| 68 | + |
| 69 | +```py |
| 70 | +>>> from transformers import AutoFeatureExtractor |
| 71 | + |
| 72 | +>>> feature_extractor = AutoFeatureExtractor.from_pretrained( |
| 73 | +... "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition" |
| 74 | +... ) |
| 75 | +``` |
| 76 | + |
| 77 | +## AutoProcessor |
| 78 | + |
| 79 | +λ©ν°λͺ¨λ¬ μμ
μλ λ κ°μ§ μ νμ μ μ²λ¦¬ λꡬλ₯Ό κ²°ν©ν νλ‘μΈμκ° νμν©λλ€. μλ₯Ό λ€μ΄ LayoutLMV2 λͺ¨λΈμλ μ΄λ―Έμ§λ₯Ό μ²λ¦¬νλ μ΄λ―Έμ§ νλ‘μΈμμ ν
μ€νΈλ₯Ό μ²λ¦¬νλ ν ν¬λμ΄μ κ° νμνλ©°, νλ‘μΈμλ μ΄ λ κ°μ§λ₯Ό κ²°ν©ν©λλ€. |
| 80 | + |
| 81 | +[`AutoProcessor.from_pretrained()`]λ‘ νλ‘μΈμλ₯Ό λ‘λν©λλ€: |
| 82 | + |
| 83 | +```py |
| 84 | +>>> from transformers import AutoProcessor |
| 85 | + |
| 86 | +>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased") |
| 87 | +``` |
| 88 | + |
| 89 | +## AutoModel |
| 90 | + |
| 91 | +<frameworkcontent> |
| 92 | +<pt> |
| 93 | +λ§μ§λ§μΌλ‘ AutoModelForν΄λμ€λ₯Ό μ¬μ©νλ©΄ μ£Όμ΄μ§ μμ
μ λν΄ λ―Έλ¦¬ νμ΅λ λͺ¨λΈμ λ‘λν μ μμ΅λλ€ (μ¬μ© κ°λ₯ν μμ
μ μ 체 λͺ©λ‘μ [μ¬κΈ°](model_doc/auto)λ₯Ό μ°Έμ‘°νμΈμ). μλ₯Ό λ€μ΄, [`AutoModelForSequenceClassification.from_pretrained`]λ₯Ό μ¬μ©νμ¬ μνμ€ λΆλ₯μ© λͺ¨λΈμ λ‘λν μ μμ΅λλ€: |
| 94 | + |
| 95 | +```py |
| 96 | +>>> from transformers import AutoModelForSequenceClassification |
| 97 | + |
| 98 | +>>> model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased") |
| 99 | +``` |
| 100 | + |
| 101 | +λμΌν 체ν¬ν¬μΈνΈλ₯Ό μ½κ² μ¬μ¬μ©νμ¬ λ€λ₯Έ μμ
μ μν€ν
μ²λ₯Ό λ‘λν μ μμ΅λλ€: |
| 102 | + |
| 103 | +```py |
| 104 | +>>> from transformers import AutoModelForTokenClassification |
| 105 | + |
| 106 | +>>> model = AutoModelForTokenClassification.from_pretrained("distilbert-base-uncased") |
| 107 | +``` |
| 108 | + |
| 109 | +<Tip warning={true}> |
| 110 | + |
| 111 | +PyTorchλͺ¨λΈμ κ²½μ° `from_pretrained()` λ©μλλ λ΄λΆμ μΌλ‘ νΌν΄μ μ¬μ©νμ¬ μμ νμ§ μμ κ²μΌλ‘ μλ €μ§ `torch.load()`λ₯Ό μ¬μ©ν©λλ€. |
| 112 | +μΌλ°μ μΌλ‘ μ λ’°ν μ μλ μμ€μμ κ°μ Έμκ±°λ λ³μ‘°λμμ μ μλ λͺ¨λΈμ λ‘λνμ§ λ§μΈμ. νκΉ
νμ΄μ€ νλΈμμ νΈμ€ν
λλ κ³΅κ° λͺ¨λΈμ κ²½μ° μ΄λ¬ν 보μ μνμ΄ λΆλΆμ μΌλ‘ μνλλ©°, κ° μ»€λ° μ λ©μ¨μ΄λ₯Ό [κ²μ¬ν©λλ€](https://huggingface.co/docs/hub/security-malware). GPGλ₯Ό μ¬μ©ν΄ μλͺ
λ [μ»€λ° κ²μ¦](https://huggingface.co/docs/hub/security-gpg#signing-commits-with-gpg)κ³Ό κ°μ λͺ¨λ²μ¬λ‘λ [λ¬Έμ](https://huggingface.co/docs/hub/security)λ₯Ό μ°Έμ‘°νμΈμ. |
| 113 | + |
| 114 | +ν
μνλ‘μ°μ Flax 체ν¬ν¬μΈνΈλ μν₯μ λ°μ§ μμΌλ©°, `from_pretrained`λ©μλμ `from_tf` μ `from_flax` ν€μλ κ°λ³ μΈμλ₯Ό μ¬μ©νμ¬ μ΄ λ¬Έμ λ₯Ό μ°νν μ μμ΅λλ€. |
| 115 | + |
| 116 | +</Tip> |
| 117 | + |
| 118 | +μΌλ°μ μΌλ‘ AutoTokenizer ν΄λμ€μ AutoModelFor ν΄λμ€λ₯Ό μ¬μ©νμ¬ λ―Έλ¦¬ νμ΅λ λͺ¨λΈ μΈμ€ν΄μ€λ₯Ό λ‘λνλ κ²μ΄ μ’μ΅λλ€. μ΄λ κ² νλ©΄ λ§€λ² μ¬λ°λ₯Έ μν€ν
μ²λ₯Ό λ‘λν μ μμ΅λλ€. λ€μ [νν 리μΌ](preprocessing)μμλ μλ‘κ² λ‘λν ν ν¬λμ΄μ , μ΄λ―Έμ§ νλ‘μΈμ, νΉμ§ μΆμΆκΈ°λ₯Ό μ¬μ©νμ¬ λ―ΈμΈ νλμ© λ°μ΄ν° μΈνΈλ₯Ό μ μ²λ¦¬νλ λ°©λ²μ λν΄ μμλ΄
λλ€. |
| 119 | +</pt> |
| 120 | +<tf> |
| 121 | +λ§μ§λ§μΌλ‘ `TFAutoModelFor` ν΄λμ€λ₯Ό μ¬μ©νλ©΄ μ£Όμ΄μ§ μμ
μ λν΄ μ¬μ νλ ¨λ λͺ¨λΈμ λ‘λν μ μμ΅λλ€. (μ¬μ© κ°λ₯ν μμ
μ μ 체 λͺ©λ‘μ [μ¬κΈ°](model_doc/auto)λ₯Ό μ°Έμ‘°νμΈμ. μλ₯Ό λ€μ΄, [`TFAutoModelForSequenceClassification.from_pretrained`]λ‘ μνμ€ λΆλ₯λ₯Ό μν λͺ¨λΈμ λ‘λν©λλ€: |
| 122 | + |
| 123 | +```py |
| 124 | +>>> from transformers import TFAutoModelForSequenceClassification |
| 125 | + |
| 126 | +>>> model = TFAutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased") |
| 127 | +``` |
| 128 | + |
| 129 | +μ½κ² λμΌν 체ν¬ν¬μΈνΈλ₯Ό μ¬μ¬μ©νμ¬ λ€λ₯Έ μμ
μ μν€ν
μ²λ₯Ό λ‘λν μ μμ΅λλ€: |
| 130 | + |
| 131 | +```py |
| 132 | +>>> from transformers import TFAutoModelForTokenClassification |
| 133 | + |
| 134 | +>>> model = TFAutoModelForTokenClassification.from_pretrained("distilbert-base-uncased") |
| 135 | +``` |
| 136 | + |
| 137 | +μΌλ°μ μΌλ‘, `AutoTokenizer`ν΄λμ€μ `TFAutoModelFor` ν΄λμ€λ₯Ό μ¬μ©νμ¬ λ―Έλ¦¬ νμ΅λ λͺ¨λΈ μΈμ€ν΄μ€λ₯Ό λ‘λνλ κ²μ΄ μ’μ΅λλ€. μ΄λ κ² νλ©΄ λ§€λ² μ¬λ°λ₯Έ μν€ν
μ²λ₯Ό λ‘λν μ μμ΅λλ€. λ€μ [νν 리μΌ](preprocessing)μμλ μλ‘κ² λ‘λν ν ν¬λμ΄μ , μ΄λ―Έμ§ νλ‘μΈμ, νΉμ§ μΆμΆκΈ°λ₯Ό μ¬μ©νμ¬ λ―ΈμΈ νλμ© λ°μ΄ν° μΈνΈλ₯Ό μ μ²λ¦¬νλ λ°©λ²μ λν΄ μμλ΄
λλ€. |
| 138 | +</tf> |
| 139 | +</frameworkcontent> |
0 commit comments