Skip to content

Commit 0b7d053

Browse files
author
Valentin
authored
fixes some key names for in LayoutLMv2 / LayoutXLM tokenizers (#14493)
in case of left padding_side there was a copy/paste error assigning the bbox data to the labels
1 parent 204d251 commit 0b7d053

File tree

4 files changed

+4
-4
lines changed

4 files changed

+4
-4
lines changed

src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1275,7 +1275,7 @@ def _pad(
12751275
if "bbox" in encoded_inputs:
12761276
encoded_inputs["bbox"] = [self.pad_token_box] * difference + encoded_inputs["bbox"]
12771277
if "labels" in encoded_inputs:
1278-
encoded_inputs["labels"] = [self.pad_token_label] * difference + encoded_inputs["bbox"]
1278+
encoded_inputs["labels"] = [self.pad_token_label] * difference + encoded_inputs["labels"]
12791279
if "special_tokens_mask" in encoded_inputs:
12801280
encoded_inputs["special_tokens_mask"] = [1] * difference + encoded_inputs["special_tokens_mask"]
12811281
encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input

src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,7 @@ def _pad(
746746
if "bbox" in encoded_inputs:
747747
encoded_inputs["bbox"] = [self.pad_token_box] * difference + encoded_inputs["bbox"]
748748
if "labels" in encoded_inputs:
749-
encoded_inputs["labels"] = [self.pad_token_label] * difference + encoded_inputs["bbox"]
749+
encoded_inputs["labels"] = [self.pad_token_label] * difference + encoded_inputs["labels"]
750750
if "special_tokens_mask" in encoded_inputs:
751751
encoded_inputs["special_tokens_mask"] = [1] * difference + encoded_inputs["special_tokens_mask"]
752752
encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input

src/transformers/models/layoutxlm/tokenization_layoutxlm.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1051,7 +1051,7 @@ def _pad(
10511051
if "bbox" in encoded_inputs:
10521052
encoded_inputs["bbox"] = [self.pad_token_box] * difference + encoded_inputs["bbox"]
10531053
if "labels" in encoded_inputs:
1054-
encoded_inputs["labels"] = [self.pad_token_label] * difference + encoded_inputs["bbox"]
1054+
encoded_inputs["labels"] = [self.pad_token_label] * difference + encoded_inputs["labels"]
10551055
if "special_tokens_mask" in encoded_inputs:
10561056
encoded_inputs["special_tokens_mask"] = [1] * difference + encoded_inputs["special_tokens_mask"]
10571057
encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input

src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -614,7 +614,7 @@ def _pad(
614614
if "bbox" in encoded_inputs:
615615
encoded_inputs["bbox"] = [self.pad_token_box] * difference + encoded_inputs["bbox"]
616616
if "labels" in encoded_inputs:
617-
encoded_inputs["labels"] = [self.pad_token_label] * difference + encoded_inputs["bbox"]
617+
encoded_inputs["labels"] = [self.pad_token_label] * difference + encoded_inputs["labels"]
618618
if "special_tokens_mask" in encoded_inputs:
619619
encoded_inputs["special_tokens_mask"] = [1] * difference + encoded_inputs["special_tokens_mask"]
620620
encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input

0 commit comments

Comments
 (0)