We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
rum_clm.py
1 parent 3663fca commit b74a955Copy full SHA for b74a955
examples/tensorflow/language-modeling/run_clm.py
@@ -347,10 +347,6 @@ def main():
347
column_names = raw_datasets["train"].column_names
348
text_column_name = "text" if "text" in column_names else column_names[0]
349
350
- # First we tokenize all the texts.
351
- column_names = raw_datasets["train"].column_names
352
- text_column_name = "text" if "text" in column_names else column_names[0]
353
-
354
def tokenize_function(examples):
355
return tokenizer(examples[text_column_name])
356
0 commit comments