42
42
)
43
43
from transformers .trainer_utils import get_last_checkpoint
44
44
from transformers .utils import check_min_version
45
+ from transformers .utils .versions import require_version
45
46
from utils_qa import postprocess_qa_predictions
46
47
47
48
48
49
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
49
50
check_min_version ("4.8.0.dev0" )
51
+ require_version ("datasets>=1.8.0" , "To fix: pip install -r examples/pytorch/question-answering/requirements.txt" )
50
52
51
53
logger = logging .getLogger (__name__ )
52
54
@@ -417,6 +419,7 @@ def prepare_train_features(examples):
417
419
num_proc = data_args .preprocessing_num_workers ,
418
420
remove_columns = column_names ,
419
421
load_from_cache_file = not data_args .overwrite_cache ,
422
+ desc = "Running tokenizer on train dataset" ,
420
423
)
421
424
if data_args .max_train_samples is not None :
422
425
# Number of samples might increase during Feature Creation, We select only specified max samples
@@ -478,6 +481,7 @@ def prepare_validation_features(examples):
478
481
num_proc = data_args .preprocessing_num_workers ,
479
482
remove_columns = column_names ,
480
483
load_from_cache_file = not data_args .overwrite_cache ,
484
+ desc = "Running tokenizer on validation dataset" ,
481
485
)
482
486
if data_args .max_eval_samples is not None :
483
487
# During Feature creation dataset samples might increase, we will select required samples again
@@ -497,6 +501,7 @@ def prepare_validation_features(examples):
497
501
num_proc = data_args .preprocessing_num_workers ,
498
502
remove_columns = column_names ,
499
503
load_from_cache_file = not data_args .overwrite_cache ,
504
+ desc = "Running tokenizer on prediction dataset" ,
500
505
)
501
506
if data_args .max_predict_samples is not None :
502
507
# During Feature creation dataset samples might increase, we will select required samples again
0 commit comments