@@ -846,8 +846,8 @@ def forward(
846
846
r"""
847
847
masked_lm_labels (``torch.LongTensor`` of shape ``(batch_size, sequence_length)``, `optional`, defaults to :obj:`None`):
848
848
Labels for computing the masked language modeling loss.
849
- Indices should be in ``[-100 , 0, ..., config.vocab_size]`` (see ``input_ids`` docstring)
850
- Tokens with indices set to ``-100 `` are ignored (masked), the loss is only computed for the tokens with labels
849
+ Indices should be in ``[-1 , 0, ..., config.vocab_size]`` (see ``input_ids`` docstring)
850
+ Tokens with indices set to ``-1 `` are ignored (masked), the loss is only computed for the tokens with labels
851
851
in ``[0, ..., config.vocab_size]``
852
852
next_sentence_label (``torch.LongTensor`` of shape ``(batch_size,)``, `optional`, defaults to :obj:`None`):
853
853
Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair (see :obj:`input_ids` docstring)
@@ -948,13 +948,13 @@ def forward(
948
948
r"""
949
949
masked_lm_labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`, defaults to :obj:`None`):
950
950
Labels for computing the masked language modeling loss.
951
- Indices should be in ``[-100 , 0, ..., config.vocab_size]`` (see ``input_ids`` docstring)
952
- Tokens with indices set to ``-100 `` are ignored (masked), the loss is only computed for the tokens with labels
951
+ Indices should be in ``[-1 , 0, ..., config.vocab_size]`` (see ``input_ids`` docstring)
952
+ Tokens with indices set to ``-1 `` are ignored (masked), the loss is only computed for the tokens with labels
953
953
in ``[0, ..., config.vocab_size]``
954
954
lm_labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`, defaults to :obj:`None`):
955
955
Labels for computing the left-to-right language modeling loss (next word prediction).
956
- Indices should be in ``[-100 , 0, ..., config.vocab_size]`` (see ``input_ids`` docstring)
957
- Tokens with indices set to ``-100 `` are ignored (masked), the loss is only computed for the tokens with labels
956
+ Indices should be in ``[-1 , 0, ..., config.vocab_size]`` (see ``input_ids`` docstring)
957
+ Tokens with indices set to ``-1 `` are ignored (masked), the loss is only computed for the tokens with labels
958
958
in ``[0, ..., config.vocab_size]``
959
959
960
960
Returns:
@@ -1015,7 +1015,7 @@ def forward(
1015
1015
# 2. If `lm_labels` is provided we are in a causal scenario where we
1016
1016
# try to predict the next token for each input in the decoder.
1017
1017
if masked_lm_labels is not None :
1018
- loss_fct = CrossEntropyLoss () # -100 index = padding token
1018
+ loss_fct = CrossEntropyLoss () # -1 index = padding token
1019
1019
masked_lm_loss = loss_fct (prediction_scores .view (- 1 , self .config .vocab_size ), masked_lm_labels .view (- 1 ))
1020
1020
outputs = (masked_lm_loss ,) + outputs
1021
1021
0 commit comments