-
Notifications
You must be signed in to change notification settings - Fork 67
/
Copy pathtest_voting_learners_expts_3.py
256 lines (222 loc) · 10.5 KB
/
test_voting_learners_expts_3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
"""
Prediction task experiment tests for voting learners.
The main idea behind these tests is to not run every single possible
experiment but rather to simply confirm that the various options specified
in the configuration file call ``__init__()`` and ``predict()`` with the right
arguments. This is reasonable because those two methods are already
tested comprehensively in ``test_voting_learners_api_3.py``.
:author: Nitin Madnani (nmadnani@ets.org)
:organization: ETS
"""
import unittest
from itertools import product
from pathlib import Path
from unittest.mock import DEFAULT, patch
from skll.data import FeatureSet
from skll.experiments import run_configuration
from skll.learner.voting import VotingLearner
from skll.utils.testing import (
BoolDict,
config_dir,
create_jsonlines_feature_files,
fill_in_config_options_for_voting_learners,
other_dir,
output_dir,
remove_jsonlines_feature_files,
test_dir,
train_dir,
)
class TestVotingLearnersExptsThree(unittest.TestCase):
"""Test class for third set of voting learner experiment tests."""
@classmethod
def setUpClass(cls):
"""Set up the tests."""
for dir_path in [train_dir, test_dir, output_dir]:
dir_path.mkdir(exist_ok=True)
# create the training and test data files that we will use
create_jsonlines_feature_files(train_dir)
create_jsonlines_feature_files(test_dir)
@classmethod
def tearDownClass(cls):
"""Clean up after tests."""
for output_file_path in output_dir.glob("test_voting_learner_predict*"):
output_file_path.unlink()
for output_file_path in Path(".").glob("test_voting_learner_predict*"):
output_file_path.unlink()
config_file_path = config_dir / "test_voting_learner_predict.cfg"
config_file_path.unlink()
remove_jsonlines_feature_files(train_dir)
remove_jsonlines_feature_files(test_dir)
def check_predict_task(self, learner_type, options_dict):
"""Check given combination of prediction configuration options."""
# create a configuration file with the given options
(
config_path,
estimator_names,
job_name,
custom_learner,
objectives,
_,
model_kwargs_list,
param_grid_list,
sampler_list,
_,
_,
_,
_,
) = fill_in_config_options_for_voting_learners(learner_type, "predict", options_dict)
# mock the `__init__()` method for the `VotingLearner` class so
# that we can check that the voting learner was instantiated with
# the right arguments; note that we are doing this mock separately
# from all the others below since any `__init__`` patch needs to
# return a `None` value which the other ones do not
init_patcher = patch.object(VotingLearner, "__init__", return_value=None)
# mock the `from_file()` method for the `VotingLearner` class;
# this method needs to return an instance of VotingLearner
from_file_patcher = patch.object(
VotingLearner, "from_file", return_value=VotingLearner(["SVC"])
)
# we also need to patch the `_create_learner_result_dicts()` function
# since there are no actual results
clrd_patcher = patch("skll.experiments._create_learner_result_dicts", return_value={})
# we also need to patch some other output functions that write
# various results to disk since we are not actually producing
# any results from `evaluate()`
output_patchers = patch.multiple(
"skll.experiments", _print_fancy_output=DEFAULT, _write_summary_file=DEFAULT
)
mock_vl_init = init_patcher.start()
mock_vl_from_file = from_file_patcher.start()
clrd_patcher.start()
output_patchers.start()
# run the configuration file but with various methods/attributes for
# `VotingLearner` mocked so that we can check that things were called
# as expected without actually needing to evaluate any models
with patch.multiple(
VotingLearner, predict=DEFAULT, train=DEFAULT, save=DEFAULT, model=DEFAULT, create=True
) as mocks:
run_configuration(
config_path,
overwrite=not options_dict["with_existing_model"],
quiet=True,
local=True,
)
# check that init was called the expected number of times;
# if we are loading an existing model from disk, it should
# never be called, otherwise is called as many times as
# there are objectives (number of featuresets is 1)
if options_dict["with_existing_model"]:
num_expected_init_calls = 0
else:
num_expected_init_calls = len(objectives) if options_dict["with_grid_search"] else 1
self.assertEqual(mock_vl_init.call_count, num_expected_init_calls)
# note that the init arguments are the same no matter the call
expected_init_args = (estimator_names,)
expected_init_kwargs = {
"voting": "soft" if options_dict["with_soft_voting"] else "hard",
"custom_learner_path": custom_learner,
"feature_scaling": "none",
"pos_label": None,
"min_feature_count": 1,
"model_kwargs_list": model_kwargs_list,
"sampler_list": sampler_list,
"sampler_kwargs_list": None,
}
# check that each init call had the expected arguments
for actual_call in mock_vl_init.call_args_list:
self.assertEqual(actual_call[0], expected_init_args)
for key, expected_value in expected_init_kwargs.items():
actual_value = actual_call[1][key]
self.assertEqual(actual_value, expected_value)
# we either trained a model via `train()` or used an existing
# model via `from_file()`; check that they were called with
# the expected arguments
if not options_dict["with_existing_model"]:
self.assertEqual(
mocks["train"].call_count,
len(objectives) if options_dict["with_grid_search"] else 1,
)
expected_train_kwargs = {
"param_grid_list": param_grid_list,
"grid_search_folds": 5,
"grid_search": options_dict["with_grid_search"],
"grid_jobs": None,
"shuffle": False,
}
for idx, actual_call in enumerate(mocks["train"].call_args_list):
actual_arg = actual_call[0][0]
self.assertTrue(isinstance(actual_arg, FeatureSet))
self.assertEqual(set(actual_arg.labels), {"cat", "dog"})
for key, expected_value in expected_train_kwargs.items():
actual_value = actual_call[1][key]
self.assertEqual(actual_value, expected_value)
# if we aren't doing grid search, then the objective should be `None`
self.assertEqual(
actual_call[1]["grid_objective"],
objectives[idx] if options_dict["with_grid_search"] else None,
)
# if we trained a model, we also saved it
self.assertEqual(
mocks["save"].call_count,
len(objectives) if options_dict["with_grid_search"] else 1,
)
self.assertEqual(mocks["save"].call_args[0][0], output_dir / f"{job_name}.model")
else:
self.assertEqual(mock_vl_from_file.call_count, 1)
self.assertEqual(mock_vl_from_file.call_args[0][0], other_dir / f"{job_name}.model")
# check that predict was called the expected number of times
self.assertEqual(
mocks["predict"].call_count,
len(objectives) if options_dict["with_grid_search"] else 1,
)
# check that each predict call had the expected arguments
expected_predict_kwargs = {
"prediction_prefix": str(output_dir / job_name)
if options_dict["with_prediction_prefix"]
else job_name,
"individual_predictions": options_dict["with_individual_predictions"],
"class_labels": False,
}
for idx, actual_call in enumerate(mocks["predict"].call_args_list):
actual_arg = actual_call[0][0]
self.assertTrue(isinstance(actual_arg, FeatureSet))
self.assertEqual(set(actual_arg.labels), {"cat", "dog"})
for key, expected_value in expected_predict_kwargs.items():
actual_value = actual_call[1][key]
self.assertEqual(actual_value, expected_value)
# stop all the manual patchers
_ = output_patchers.stop()
_ = clrd_patcher.stop()
_ = from_file_patcher.stop()
_ = init_patcher.stop()
def test_predict_task(self):
# test various combinations of experiment configuration options
option_names = [
"with_soft_voting",
"with_model_kwargs_list",
"with_grid_search",
"with_existing_model",
"with_prediction_prefix",
"with_individual_predictions",
]
for option_values in product(
["classifier", "regressor"],
[False, True],
[False, True],
[False, True],
[False, True],
[False, True],
[False, True],
):
# assign the learner type separately
learner_type = option_values[0]
# create a dictionary for all the other options; we are using
# a dictionary class that returns `False` for non-existent keys
options_dict = BoolDict(zip(option_names, option_values[1:]))
# voting regressors do not support soft voting
if learner_type == "regressor" and options_dict["with_soft_voting"]:
continue
# if we are using an existing model, there will be no grid search
if options_dict["with_existing_model"] and options_dict["with_grid_search"]:
continue
yield self.check_predict_task, learner_type, options_dict