40
40
is_fairscale_available ,
41
41
is_optuna_available ,
42
42
is_ray_tune_available ,
43
+ is_sigopt_available ,
43
44
run_hp_search_optuna ,
44
45
run_hp_search_ray ,
46
+ run_hp_search_sigopt ,
45
47
)
46
48
47
49
import numpy as np
@@ -231,9 +233,9 @@ class Trainer:
231
233
A function that instantiates the model to be used. If provided, each call to
232
234
:meth:`~transformers.Trainer.train` will start from a new instance of the model as given by this function.
233
235
234
- The function may have zero argument, or a single one containing the optuna/Ray Tune trial object, to be
235
- able to choose different architectures according to hyper parameters (such as layer count, sizes of inner
236
- layers, dropout probabilities etc).
236
+ The function may have zero argument, or a single one containing the optuna/Ray Tune/SigOpt trial object, to
237
+ be able to choose different architectures according to hyper parameters (such as layer count, sizes of
238
+ inner layers, dropout probabilities etc).
237
239
compute_metrics (:obj:`Callable[[EvalPrediction], Dict]`, `optional`):
238
240
The function that will be used to compute metrics at evaluation. Must take a
239
241
:class:`~transformers.EvalPrediction` and return a dictionary string to metric values.
@@ -869,6 +871,8 @@ def _hp_search_setup(self, trial: Union["optuna.Trial", Dict[str, Any]]):
869
871
elif self .hp_search_backend == HPSearchBackend .RAY :
870
872
params = trial
871
873
params .pop ("wandb" , None )
874
+ elif self .hp_search_backend == HPSearchBackend .SIGOPT :
875
+ params = {k : int (v ) if isinstance (v , str ) else v for k , v in trial .assignments .items ()}
872
876
873
877
for key , value in params .items ():
874
878
if not hasattr (self .args , key ):
@@ -883,6 +887,8 @@ def _hp_search_setup(self, trial: Union["optuna.Trial", Dict[str, Any]]):
883
887
setattr (self .args , key , value )
884
888
if self .hp_search_backend == HPSearchBackend .OPTUNA :
885
889
logger .info ("Trial:" , trial .params )
890
+ if self .hp_search_backend == HPSearchBackend .SIGOPT :
891
+ logger .info (f"SigOpt Assignments: { trial .assignments } " )
886
892
if self .args .deepspeed :
887
893
# Rebuild the deepspeed config to reflect the updated training parameters
888
894
from transformers .deepspeed import HfDeepSpeedConfig
@@ -1232,7 +1238,7 @@ def train(
1232
1238
self .callback_handler .lr_scheduler = self .lr_scheduler
1233
1239
self .callback_handler .train_dataloader = train_dataloader
1234
1240
self .state .trial_name = self .hp_name (trial ) if self .hp_name is not None else None
1235
- self .state .trial_params = hp_params (trial ) if trial is not None else None
1241
+ self .state .trial_params = hp_params (trial . assignments ) if trial is not None else None
1236
1242
# This should be the same if the state has been saved but in case the training arguments changed, it's safer
1237
1243
# to set this after the load.
1238
1244
self .state .max_steps = max_steps
@@ -1524,10 +1530,12 @@ def _save_checkpoint(self, model, trial, metrics=None):
1524
1530
if self .hp_search_backend is not None and trial is not None :
1525
1531
if self .hp_search_backend == HPSearchBackend .OPTUNA :
1526
1532
run_id = trial .number
1527
- else :
1533
+ elif self . hp_search_backend == HPSearchBackend . RAY :
1528
1534
from ray import tune
1529
1535
1530
1536
run_id = tune .get_trial_id ()
1537
+ elif self .hp_search_backend == HPSearchBackend .SIGOPT :
1538
+ run_id = trial .id
1531
1539
run_name = self .hp_name (trial ) if self .hp_name is not None else f"run-{ run_id } "
1532
1540
run_dir = os .path .join (self .args .output_dir , run_name )
1533
1541
else :
@@ -1671,9 +1679,9 @@ def hyperparameter_search(
1671
1679
** kwargs ,
1672
1680
) -> BestRun :
1673
1681
"""
1674
- Launch an hyperparameter search using ``optuna`` or ``Ray Tune``. The optimized quantity is determined by
1675
- :obj:`compute_objective`, which defaults to a function returning the evaluation loss when no metric is
1676
- provided, the sum of all metrics otherwise.
1682
+ Launch an hyperparameter search using ``optuna`` or ``Ray Tune`` or ``SigOpt`` . The optimized quantity is
1683
+ determined by :obj:`compute_objective`, which defaults to a function returning the evaluation loss when no
1684
+ metric is provided, the sum of all metrics otherwise.
1677
1685
1678
1686
.. warning::
1679
1687
@@ -1686,7 +1694,8 @@ def hyperparameter_search(
1686
1694
hp_space (:obj:`Callable[["optuna.Trial"], Dict[str, float]]`, `optional`):
1687
1695
A function that defines the hyperparameter search space. Will default to
1688
1696
:func:`~transformers.trainer_utils.default_hp_space_optuna` or
1689
- :func:`~transformers.trainer_utils.default_hp_space_ray` depending on your backend.
1697
+ :func:`~transformers.trainer_utils.default_hp_space_ray` or
1698
+ :func:`~transformers.trainer_utils.default_hp_space_sigopt` depending on your backend.
1690
1699
compute_objective (:obj:`Callable[[Dict[str, float]], float]`, `optional`):
1691
1700
A function computing the objective to minimize or maximize from the metrics returned by the
1692
1701
:obj:`evaluate` method. Will default to :func:`~transformers.trainer_utils.default_compute_objective`.
@@ -1697,8 +1706,8 @@ def hyperparameter_search(
1697
1706
pick :obj:`"minimize"` when optimizing the validation loss, :obj:`"maximize"` when optimizing one or
1698
1707
several metrics.
1699
1708
backend(:obj:`str` or :class:`~transformers.training_utils.HPSearchBackend`, `optional`):
1700
- The backend to use for hyperparameter search. Will default to optuna or Ray Tune, depending on which
1701
- one is installed. If both are installed, will default to optuna.
1709
+ The backend to use for hyperparameter search. Will default to optuna or Ray Tune or SigOpt, depending
1710
+ on which one is installed. If all are installed, will default to optuna.
1702
1711
kwargs:
1703
1712
Additional keyword arguments passed along to :obj:`optuna.create_study` or :obj:`ray.tune.run`. For
1704
1713
more information see:
@@ -1707,6 +1716,7 @@ def hyperparameter_search(
1707
1716
<https://optuna.readthedocs.io/en/stable/reference/generated/optuna.study.create_study.html>`__
1708
1717
- the documentation of `tune.run
1709
1718
<https://docs.ray.io/en/latest/tune/api_docs/execution.html#tune-run>`__
1719
+ - the documentation of `sigopt <https://app.sigopt.com/docs/endpoints/experiments/create>`__
1710
1720
1711
1721
Returns:
1712
1722
:class:`transformers.trainer_utils.BestRun`: All the information about the best run.
@@ -1718,6 +1728,7 @@ def hyperparameter_search(
1718
1728
"At least one of optuna or ray should be installed. "
1719
1729
"To install optuna run `pip install optuna`."
1720
1730
"To install ray run `pip install ray[tune]`."
1731
+ "To install sigopt run `pip install sigopt`."
1721
1732
)
1722
1733
backend = HPSearchBackend (backend )
1723
1734
if backend == HPSearchBackend .OPTUNA and not is_optuna_available ():
@@ -1726,6 +1737,8 @@ def hyperparameter_search(
1726
1737
raise RuntimeError (
1727
1738
"You picked the Ray Tune backend, but it is not installed. Use `pip install 'ray[tune]'`."
1728
1739
)
1740
+ if backend == HPSearchBackend .SIGOPT and not is_sigopt_available ():
1741
+ raise RuntimeError ("You picked the sigopt backend, but it is not installed. Use `pip install sigopt`." )
1729
1742
self .hp_search_backend = backend
1730
1743
if self .model_init is None :
1731
1744
raise RuntimeError (
@@ -1736,8 +1749,12 @@ def hyperparameter_search(
1736
1749
self .hp_name = hp_name
1737
1750
self .compute_objective = default_compute_objective if compute_objective is None else compute_objective
1738
1751
1739
- run_hp_search = run_hp_search_optuna if backend == HPSearchBackend .OPTUNA else run_hp_search_ray
1740
- best_run = run_hp_search (self , n_trials , direction , ** kwargs )
1752
+ backend_dict = {
1753
+ HPSearchBackend .OPTUNA : run_hp_search_optuna ,
1754
+ HPSearchBackend .RAY : run_hp_search_ray ,
1755
+ HPSearchBackend .SIGOPT : run_hp_search_sigopt ,
1756
+ }
1757
+ best_run = backend_dict [backend ](self , n_trials , direction , ** kwargs )
1741
1758
1742
1759
self .hp_search_backend = None
1743
1760
return best_run
0 commit comments