Skip to content

Commit d57da99

Browse files
authored
Add tests for no_trainer and fix existing examples (#16656)
* Fixed some bugs involving saving during epochs * Added tests mimicking the existing examples tests * Added in json exporting to all `no_trainer` examples for consistency
1 parent ab22966 commit d57da99

File tree

11 files changed

+414
-22
lines changed

11 files changed

+414
-22
lines changed

.circleci/config.yml

+1
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,7 @@ jobs:
587587
- run: pip install --upgrade pip
588588
- run: pip install .[sklearn,torch,sentencepiece,testing,torch-speech]
589589
- run: pip install -r examples/pytorch/_tests_requirements.txt
590+
- run: pip install git+https://github.com/huggingface/accelerate
590591
- save_cache:
591592
key: v0.4-torch_examples-{{ checksum "setup.py" }}
592593
paths:

examples/pytorch/language-modeling/run_clm_no_trainer.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
# You can also adapt this script on your own causal language modeling task. Pointers for this are left as comments.
2424

2525
import argparse
26+
import json
2627
import logging
2728
import math
2829
import os
@@ -537,7 +538,10 @@ def group_texts(examples):
537538

538539
if isinstance(checkpointing_steps, int):
539540
if completed_steps % checkpointing_steps == 0:
540-
accelerator.save_state(f"step_{completed_steps}")
541+
output_dir = f"step_{completed_steps}"
542+
if args.output_dir is not None:
543+
output_dir = os.path.join(args.output_dir, output_dir)
544+
accelerator.save_state(output_dir)
541545

542546
if completed_steps >= args.max_train_steps:
543547
break
@@ -581,7 +585,10 @@ def group_texts(examples):
581585
)
582586

583587
if args.checkpointing_steps == "epoch":
584-
accelerator.save_state(f"epoch_{epoch}")
588+
output_dir = f"epoch_{epoch}"
589+
if args.output_dir is not None:
590+
output_dir = os.path.join(args.output_dir, output_dir)
591+
accelerator.save_state(output_dir)
585592

586593
if args.output_dir is not None:
587594
accelerator.wait_for_everyone()
@@ -592,6 +599,9 @@ def group_texts(examples):
592599
if args.push_to_hub:
593600
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
594601

602+
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
603+
json.dump({"perplexity": perplexity}, f)
604+
595605

596606
if __name__ == "__main__":
597607
main()

examples/pytorch/language-modeling/run_mlm_no_trainer.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
# You can also adapt this script on your own mlm task. Pointers for this are left as comments.
2424

2525
import argparse
26+
import json
2627
import logging
2728
import math
2829
import os
@@ -457,9 +458,11 @@ def group_texts(examples):
457458
train_dataset = tokenized_datasets["train"]
458459
eval_dataset = tokenized_datasets["validation"]
459460

460-
# Log a few random samples from the training set:
461-
for index in random.sample(range(len(train_dataset)), 3):
462-
logger.info(f"Sample {index} of the training set: {train_dataset[index]}.")
461+
# Conditional for small test subsets
462+
if len(train_dataset) > 3:
463+
# Log a few random samples from the training set:
464+
for index in random.sample(range(len(train_dataset)), 3):
465+
logger.info(f"Sample {index} of the training set: {train_dataset[index]}.")
463466

464467
# Data collator
465468
# This one will take care of randomly masking the tokens.
@@ -581,7 +584,10 @@ def group_texts(examples):
581584

582585
if isinstance(checkpointing_steps, int):
583586
if completed_steps % checkpointing_steps == 0:
584-
accelerator.save_state(f"step_{completed_steps}")
587+
output_dir = f"step_{completed_steps}"
588+
if args.output_dir is not None:
589+
output_dir = os.path.join(args.output_dir, output_dir)
590+
accelerator.save_state(output_dir)
585591

586592
if completed_steps >= args.max_train_steps:
587593
break
@@ -625,7 +631,10 @@ def group_texts(examples):
625631
)
626632

627633
if args.checkpointing_steps == "epoch":
628-
accelerator.save_state(f"epoch_{epoch}")
634+
output_dir = f"epoch_{epoch}"
635+
if args.output_dir is not None:
636+
output_dir = os.path.join(args.output_dir, output_dir)
637+
accelerator.save_state(output_dir)
629638

630639
if args.output_dir is not None:
631640
accelerator.wait_for_everyone()
@@ -636,6 +645,9 @@ def group_texts(examples):
636645
if args.push_to_hub:
637646
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
638647

648+
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
649+
json.dump({"perplexity": perplexity}, f)
650+
639651

640652
if __name__ == "__main__":
641653
main()

examples/pytorch/multiple-choice/run_swag_no_trainer.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
# You can also adapt this script on your own multiple choice task. Pointers for this are left as comments.
2020

2121
import argparse
22+
import json
2223
import logging
2324
import math
2425
import os
@@ -540,7 +541,10 @@ def preprocess_function(examples):
540541

541542
if isinstance(checkpointing_steps, int):
542543
if completed_steps % checkpointing_steps == 0:
543-
accelerator.save_state(f"step_{completed_steps}")
544+
output_dir = f"step_{completed_steps}"
545+
if args.output_dir is not None:
546+
output_dir = os.path.join(args.output_dir, output_dir)
547+
accelerator.save_state(output_dir)
544548

545549
if completed_steps >= args.max_train_steps:
546550
break
@@ -578,6 +582,12 @@ def preprocess_function(examples):
578582
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
579583
)
580584

585+
if args.checkpointing_steps == "epoch":
586+
output_dir = f"epoch_{epoch}"
587+
if args.output_dir is not None:
588+
output_dir = os.path.join(args.output_dir, output_dir)
589+
accelerator.save_state(output_dir)
590+
581591
if args.output_dir is not None:
582592
accelerator.wait_for_everyone()
583593
unwrapped_model = accelerator.unwrap_model(model)
@@ -586,6 +596,8 @@ def preprocess_function(examples):
586596
tokenizer.save_pretrained(args.output_dir)
587597
if args.push_to_hub:
588598
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
599+
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
600+
json.dump({"eval_accuracy": eval_metric["accuracy"]}, f)
589601

590602

591603
if __name__ == "__main__":

examples/pytorch/question-answering/run_qa_no_trainer.py

+13-4
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
# You can also adapt this script on your own question answering task. Pointers for this are left as comments.
2020

2121
import argparse
22+
import json
2223
import logging
2324
import math
2425
import os
@@ -783,11 +784,20 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len):
783784

784785
if isinstance(checkpointing_steps, int):
785786
if completed_steps % checkpointing_steps == 0:
786-
accelerator.save_state(f"step_{completed_steps}")
787+
output_dir = f"step_{completed_steps}"
788+
if args.output_dir is not None:
789+
output_dir = os.path.join(args.output_dir, output_dir)
790+
accelerator.save_state(output_dir)
787791

788792
if completed_steps >= args.max_train_steps:
789793
break
790794

795+
if args.checkpointing_steps == "epoch":
796+
output_dir = f"epoch_{epoch}"
797+
if args.output_dir is not None:
798+
output_dir = os.path.join(args.output_dir, output_dir)
799+
accelerator.save_state(output_dir)
800+
791801
if args.push_to_hub and epoch < args.num_train_epochs - 1:
792802
accelerator.wait_for_everyone()
793803
unwrapped_model = accelerator.unwrap_model(model)
@@ -879,9 +889,6 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len):
879889

880890
accelerator.log(log, step=completed_steps)
881891

882-
if args.checkpointing_steps == "epoch":
883-
accelerator.save_state(f"epoch_{epoch}")
884-
885892
if args.output_dir is not None:
886893
accelerator.wait_for_everyone()
887894
unwrapped_model = accelerator.unwrap_model(model)
@@ -890,6 +897,8 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len):
890897
tokenizer.save_pretrained(args.output_dir)
891898
if args.push_to_hub:
892899
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
900+
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
901+
json.dump({"eval_f1": eval_metric["f1"], "eval_exact": eval_metric["exact"]}, f)
893902

894903

895904
if __name__ == "__main__":

examples/pytorch/summarization/run_summarization_no_trainer.py

+19-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
# You can also adapt this script on your own summarization task. Pointers for this are left as comments.
2020

2121
import argparse
22+
import json
2223
import logging
2324
import math
2425
import os
@@ -602,7 +603,10 @@ def postprocess_text(preds, labels):
602603

603604
if isinstance(checkpointing_steps, int):
604605
if completed_steps % checkpointing_steps == 0:
605-
accelerator.save_state(f"step_{completed_steps}")
606+
output_dir = f"step_{completed_steps}"
607+
if args.output_dir is not None:
608+
output_dir = os.path.join(args.output_dir, output_dir)
609+
accelerator.save_state(output_dir)
606610

607611
if completed_steps >= args.max_train_steps:
608612
break
@@ -669,7 +673,10 @@ def postprocess_text(preds, labels):
669673
)
670674

671675
if args.checkpointing_steps == "epoch":
672-
accelerator.save_state(f"epoch_{epoch}")
676+
output_dir = f"epoch_{epoch}"
677+
if args.output_dir is not None:
678+
output_dir = os.path.join(args.output_dir, output_dir)
679+
accelerator.save_state(output_dir)
673680

674681
if args.output_dir is not None:
675682
accelerator.wait_for_everyone()
@@ -679,6 +686,16 @@ def postprocess_text(preds, labels):
679686
tokenizer.save_pretrained(args.output_dir)
680687
if args.push_to_hub:
681688
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
689+
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
690+
json.dump(
691+
{
692+
"eval_rouge1": result["rouge1"],
693+
"eval_rouge2": result["rouge2"],
694+
"eval_rougeL": result["rougeL"],
695+
"eval_rougeLsum": result["rougeLsum"],
696+
},
697+
f,
698+
)
682699

683700

684701
if __name__ == "__main__":

0 commit comments

Comments
 (0)