19
19
import logging
20
20
import os
21
21
import shutil
22
- import subprocess
23
22
import sys
24
23
import tempfile
24
+ from unittest import mock
25
25
26
26
import torch
27
27
28
28
from accelerate .utils import write_basic_config
29
- from transformers .testing_utils import TestCasePlus , get_gpu_count , slow , torch_device
29
+ from transformers .testing_utils import TestCasePlus , get_gpu_count , run_command , slow , torch_device
30
30
from transformers .utils import is_apex_available
31
31
32
32
@@ -75,6 +75,7 @@ def setUpClass(cls):
75
75
def tearDownClass (cls ):
76
76
shutil .rmtree (cls .tmpdir )
77
77
78
+ @mock .patch .dict (os .environ , {"WANDB_MODE" : "offline" })
78
79
def test_run_glue_no_trainer (self ):
79
80
tmp_dir = self .get_auto_remove_tmp_dir ()
80
81
testargs = f"""
@@ -94,12 +95,13 @@ def test_run_glue_no_trainer(self):
94
95
if is_cuda_and_apex_available ():
95
96
testargs .append ("--fp16" )
96
97
97
- _ = subprocess . run (self ._launch_args + testargs , stdout = subprocess . PIPE )
98
+ run_command (self ._launch_args + testargs )
98
99
result = get_results (tmp_dir )
99
100
self .assertGreaterEqual (result ["eval_accuracy" ], 0.75 )
100
101
self .assertTrue (os .path .exists (os .path .join (tmp_dir , "epoch_0" )))
101
102
self .assertTrue (os .path .exists (os .path .join (tmp_dir , "glue_no_trainer" )))
102
103
104
+ @mock .patch .dict (os .environ , {"WANDB_MODE" : "offline" })
103
105
def test_run_clm_no_trainer (self ):
104
106
tmp_dir = self .get_auto_remove_tmp_dir ()
105
107
testargs = f"""
@@ -120,12 +122,13 @@ def test_run_clm_no_trainer(self):
120
122
# Skipping because there are not enough batches to train the model + would need a drop_last to work.
121
123
return
122
124
123
- _ = subprocess . run (self ._launch_args + testargs , stdout = subprocess . PIPE )
125
+ run_command (self ._launch_args + testargs )
124
126
result = get_results (tmp_dir )
125
127
self .assertLess (result ["perplexity" ], 100 )
126
128
self .assertTrue (os .path .exists (os .path .join (tmp_dir , "epoch_0" )))
127
129
self .assertTrue (os .path .exists (os .path .join (tmp_dir , "clm_no_trainer" )))
128
130
131
+ @mock .patch .dict (os .environ , {"WANDB_MODE" : "offline" })
129
132
def test_run_mlm_no_trainer (self ):
130
133
tmp_dir = self .get_auto_remove_tmp_dir ()
131
134
testargs = f"""
@@ -139,12 +142,13 @@ def test_run_mlm_no_trainer(self):
139
142
--with_tracking
140
143
""" .split ()
141
144
142
- _ = subprocess . run (self ._launch_args + testargs , stdout = subprocess . PIPE )
145
+ run_command (self ._launch_args + testargs )
143
146
result = get_results (tmp_dir )
144
147
self .assertLess (result ["perplexity" ], 42 )
145
148
self .assertTrue (os .path .exists (os .path .join (tmp_dir , "epoch_0" )))
146
149
self .assertTrue (os .path .exists (os .path .join (tmp_dir , "mlm_no_trainer" )))
147
150
151
+ @mock .patch .dict (os .environ , {"WANDB_MODE" : "offline" })
148
152
def test_run_ner_no_trainer (self ):
149
153
# with so little data distributed training needs more epochs to get the score on par with 0/1 gpu
150
154
epochs = 7 if get_gpu_count () > 1 else 2
@@ -165,13 +169,14 @@ def test_run_ner_no_trainer(self):
165
169
--with_tracking
166
170
""" .split ()
167
171
168
- _ = subprocess . run (self ._launch_args + testargs , stdout = subprocess . PIPE )
172
+ run_command (self ._launch_args + testargs )
169
173
result = get_results (tmp_dir )
170
174
self .assertGreaterEqual (result ["eval_accuracy" ], 0.75 )
171
175
self .assertLess (result ["train_loss" ], 0.5 )
172
176
self .assertTrue (os .path .exists (os .path .join (tmp_dir , "epoch_0" )))
173
177
self .assertTrue (os .path .exists (os .path .join (tmp_dir , "ner_no_trainer" )))
174
178
179
+ @mock .patch .dict (os .environ , {"WANDB_MODE" : "offline" })
175
180
def test_run_squad_no_trainer (self ):
176
181
tmp_dir = self .get_auto_remove_tmp_dir ()
177
182
testargs = f"""
@@ -190,14 +195,15 @@ def test_run_squad_no_trainer(self):
190
195
--with_tracking
191
196
""" .split ()
192
197
193
- _ = subprocess . run (self ._launch_args + testargs , stdout = subprocess . PIPE )
198
+ run_command (self ._launch_args + testargs )
194
199
result = get_results (tmp_dir )
195
200
# Because we use --version_2_with_negative the testing script uses SQuAD v2 metrics.
196
201
self .assertGreaterEqual (result ["eval_f1" ], 28 )
197
202
self .assertGreaterEqual (result ["eval_exact" ], 28 )
198
203
self .assertTrue (os .path .exists (os .path .join (tmp_dir , "epoch_0" )))
199
204
self .assertTrue (os .path .exists (os .path .join (tmp_dir , "qa_no_trainer" )))
200
205
206
+ @mock .patch .dict (os .environ , {"WANDB_MODE" : "offline" })
201
207
def test_run_swag_no_trainer (self ):
202
208
tmp_dir = self .get_auto_remove_tmp_dir ()
203
209
testargs = f"""
@@ -214,12 +220,13 @@ def test_run_swag_no_trainer(self):
214
220
--with_tracking
215
221
""" .split ()
216
222
217
- _ = subprocess . run (self ._launch_args + testargs , stdout = subprocess . PIPE )
223
+ run_command (self ._launch_args + testargs )
218
224
result = get_results (tmp_dir )
219
225
self .assertGreaterEqual (result ["eval_accuracy" ], 0.8 )
220
226
self .assertTrue (os .path .exists (os .path .join (tmp_dir , "swag_no_trainer" )))
221
227
222
228
@slow
229
+ @mock .patch .dict (os .environ , {"WANDB_MODE" : "offline" })
223
230
def test_run_summarization_no_trainer (self ):
224
231
tmp_dir = self .get_auto_remove_tmp_dir ()
225
232
testargs = f"""
@@ -237,7 +244,7 @@ def test_run_summarization_no_trainer(self):
237
244
--with_tracking
238
245
""" .split ()
239
246
240
- _ = subprocess . run (self ._launch_args + testargs , stdout = subprocess . PIPE )
247
+ run_command (self ._launch_args + testargs )
241
248
result = get_results (tmp_dir )
242
249
self .assertGreaterEqual (result ["eval_rouge1" ], 10 )
243
250
self .assertGreaterEqual (result ["eval_rouge2" ], 2 )
@@ -247,6 +254,7 @@ def test_run_summarization_no_trainer(self):
247
254
self .assertTrue (os .path .exists (os .path .join (tmp_dir , "summarization_no_trainer" )))
248
255
249
256
@slow
257
+ @mock .patch .dict (os .environ , {"WANDB_MODE" : "offline" })
250
258
def test_run_translation_no_trainer (self ):
251
259
tmp_dir = self .get_auto_remove_tmp_dir ()
252
260
testargs = f"""
@@ -268,7 +276,7 @@ def test_run_translation_no_trainer(self):
268
276
--with_tracking
269
277
""" .split ()
270
278
271
- _ = subprocess . run (self ._launch_args + testargs , stdout = subprocess . PIPE )
279
+ run_command (self ._launch_args + testargs )
272
280
result = get_results (tmp_dir )
273
281
self .assertGreaterEqual (result ["eval_bleu" ], 30 )
274
282
self .assertTrue (os .path .exists (os .path .join (tmp_dir , "epoch_0" )))
@@ -292,10 +300,11 @@ def test_run_semantic_segmentation_no_trainer(self):
292
300
--checkpointing_steps epoch
293
301
""" .split ()
294
302
295
- _ = subprocess . run (self ._launch_args + testargs , stdout = subprocess . PIPE )
303
+ run_command (self ._launch_args + testargs )
296
304
result = get_results (tmp_dir )
297
305
self .assertGreaterEqual (result ["eval_overall_accuracy" ], 0.10 )
298
306
307
+ @mock .patch .dict (os .environ , {"WANDB_MODE" : "offline" })
299
308
def test_run_image_classification_no_trainer (self ):
300
309
tmp_dir = self .get_auto_remove_tmp_dir ()
301
310
testargs = f"""
@@ -316,9 +325,9 @@ def test_run_image_classification_no_trainer(self):
316
325
if is_cuda_and_apex_available ():
317
326
testargs .append ("--fp16" )
318
327
319
- _ = subprocess . run (self ._launch_args + testargs , stdout = subprocess . PIPE )
328
+ run_command (self ._launch_args + testargs )
320
329
result = get_results (tmp_dir )
321
330
# The base model scores a 25%
322
- self .assertGreaterEqual (result ["eval_accuracy" ], 0.625 )
331
+ self .assertGreaterEqual (result ["eval_accuracy" ], 0.6 )
323
332
self .assertTrue (os .path .exists (os .path .join (tmp_dir , "step_1" )))
324
333
self .assertTrue (os .path .exists (os .path .join (tmp_dir , "image_classification_no_trainer" )))
0 commit comments