Skip to content

Commit 324f361

Browse files
authored
Fix saving TF custom models (#7291)
* Fix #7277 * Apply style * Add a full training pipeline test * Apply style
1 parent cd9a058 commit 324f361

File tree

3 files changed

+77
-14
lines changed

3 files changed

+77
-14
lines changed

src/transformers/modeling_tf_utils.py

+13-13
Original file line numberDiff line numberDiff line change
@@ -85,20 +85,20 @@ def keras_serializable(cls):
8585

8686
@functools.wraps(initializer)
8787
def wrapped_init(self, *args, **kwargs):
88-
transformers_config = kwargs.pop("transformers_config", None)
89-
config = args[0] if args and isinstance(args[0], PretrainedConfig) else kwargs.get("config", None)
90-
if config is not None and transformers_config is not None:
91-
raise ValueError("Must pass either `config` or `transformers_config`, not both")
92-
elif config is not None:
93-
# normal layer construction, call with unchanged args (config is already in there)
94-
initializer(self, *args, **kwargs)
95-
elif transformers_config is not None:
96-
# Keras deserialization, convert dict to config
97-
config = config_class.from_dict(transformers_config)
88+
config = args[0] if args and isinstance(args[0], PretrainedConfig) else kwargs.pop("config", None)
89+
90+
if isinstance(config, dict):
91+
config = config_class.from_dict(config)
9892
initializer(self, config, *args, **kwargs)
93+
elif isinstance(config, PretrainedConfig):
94+
if len(args) > 0:
95+
initializer(self, *args, **kwargs)
96+
else:
97+
initializer(self, config, *args, **kwargs)
9998
else:
100-
raise ValueError("Must pass either `config` (PretrainedConfig) or `transformers_config` (dict)")
101-
self._transformers_config = config
99+
raise ValueError("Must pass either `config` (PretrainedConfig) or `config` (dict)")
100+
101+
self._config = config
102102
self._kwargs = kwargs
103103

104104
cls.__init__ = wrapped_init
@@ -109,7 +109,7 @@ def wrapped_init(self, *args, **kwargs):
109109

110110
def get_config(self):
111111
cfg = super(cls, self).get_config()
112-
cfg["transformers_config"] = self._transformers_config.to_dict()
112+
cfg["config"] = self._config.to_dict()
113113
cfg.update(self._kwargs)
114114
return cfg
115115

tests/test_modeling_tf_common.py

+63
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,69 @@ def test_pt_tf_model_equivalence(self):
354354
max_diff = np.amax(np.abs(tfo - pto))
355355
self.assertLessEqual(max_diff, 4e-2)
356356

357+
def test_train_pipeline_custom_model(self):
358+
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
359+
tf_main_layer_classes = set(
360+
module_member
361+
for model_class in self.all_model_classes
362+
for module in (import_module(model_class.__module__),)
363+
for module_member_name in dir(module)
364+
if module_member_name.endswith("MainLayer")
365+
for module_member in (getattr(module, module_member_name),)
366+
if isinstance(module_member, type)
367+
and tf.keras.layers.Layer in module_member.__bases__
368+
and getattr(module_member, "_keras_serializable", False)
369+
)
370+
371+
for main_layer_class in tf_main_layer_classes:
372+
# T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter
373+
if "T5" in main_layer_class.__name__:
374+
# Take the same values than in TFT5ModelTester for this shared layer
375+
shared = TFSharedEmbeddings(self.model_tester.vocab_size, self.model_tester.hidden_size, name="shared")
376+
config.use_cache = False
377+
main_layer = main_layer_class(config, embed_tokens=shared)
378+
del inputs_dict["use_cache"]
379+
else:
380+
main_layer = main_layer_class(config)
381+
382+
symbolic_inputs = {
383+
name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
384+
}
385+
386+
if hasattr(self.model_tester, "num_labels"):
387+
num_labels = self.model_tester.num_labels
388+
else:
389+
num_labels = 2
390+
391+
X = tf.data.Dataset.from_tensor_slices(
392+
(inputs_dict, np.random.randint(0, num_labels, (self.model_tester.batch_size, 1)))
393+
).batch(1)
394+
395+
hidden_states = main_layer(symbolic_inputs)[0]
396+
outputs = tf.keras.layers.Dense(num_labels, activation="softmax", name="outputs")(hidden_states)
397+
model = tf.keras.models.Model(inputs=symbolic_inputs, outputs=[outputs])
398+
399+
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["acc"])
400+
model.fit(X, epochs=1)
401+
402+
with tempfile.TemporaryDirectory() as tmpdirname:
403+
filepath = os.path.join(tmpdirname, "keras_model.h5")
404+
model.save(filepath)
405+
if "T5" in main_layer_class.__name__:
406+
model = tf.keras.models.load_model(
407+
filepath,
408+
custom_objects={
409+
main_layer_class.__name__: main_layer_class,
410+
"TFSharedEmbeddings": TFSharedEmbeddings,
411+
},
412+
)
413+
else:
414+
model = tf.keras.models.load_model(
415+
filepath, custom_objects={main_layer_class.__name__: main_layer_class}
416+
)
417+
assert isinstance(model, tf.keras.Model)
418+
model(inputs_dict)
419+
357420
def test_compile_tf_model(self):
358421
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
359422

tests/test_modeling_tf_funnel.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ def prepare_config_and_inputs_for_common(self):
327327

328328

329329
@require_tf
330-
class FunnelModelTest(TFModelTesterMixin, unittest.TestCase):
330+
class TFFunnelModelTest(TFModelTesterMixin, unittest.TestCase):
331331
all_model_classes = (
332332
(
333333
TFFunnelModel,

0 commit comments

Comments
 (0)