-
Notifications
You must be signed in to change notification settings - Fork 67
/
Copy pathtest_examples.py
136 lines (114 loc) · 5.55 KB
/
test_examples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# License: BSD 3 clause
"""
Run the examples, just to make sure they are all still working.
:author: Jeremy Biggs (jbiggs@@ets.org)
"""
import json
import subprocess
import unittest
from os import environ
from pathlib import Path, PurePath
from shutil import copyfile, copytree, rmtree
from skll.experiments import run_configuration
from skll.utils.testing import examples_dir, other_dir
_old_titanic_dir = examples_dir / "titanic"
_old_california_dir = examples_dir / "california"
_old_iris_dir = examples_dir / "iris"
_new_titanic_dir = other_dir / "titanic"
_new_california_dir = other_dir / "california"
_new_iris_dir = other_dir / "iris"
# if we are running the tests without activating the conda
# environment (as we do when testing the conda and TestPyPI
# packages), then we will usually pass in a BINDIR environment
# variable that points to where the environment's `bin` directory
# is located
_binary_dir = Path(environ.get("BINDIR", ""))
class TestExamples(unittest.TestCase):
"""Test class for running examples."""
@classmethod
def setUpClass(cls):
"""Create directories for testing, and copy files to new locations."""
# Create the directories we need for california and iris;
# if these directories already exist, it's fine
for dir_path in [_new_iris_dir, _new_california_dir]:
dir_path.mkdir(exist_ok=True)
# We get rid of the new titanic directory, if it already exists,
# because `copytree()` will raise an error if it already exists.
# Note :: In Python 3.8, `copytree()` has a new argument,
# `dirs_exist_ok`, which would render this step unnecessary.
if _new_titanic_dir.exists():
rmtree(_new_titanic_dir)
# Copy the titanic data to our new directories
copytree(_old_titanic_dir, _new_titanic_dir)
# Create all of the data sets we need
python_binary = _binary_dir / "python" if _binary_dir else "python"
subprocess.run(
[python_binary, examples_dir / "make_titanic_example_data.py"],
cwd=_new_titanic_dir.parent,
)
subprocess.run(
[python_binary, examples_dir / "make_california_example_data.py"],
cwd=_new_california_dir.parent,
)
subprocess.run(
[python_binary, examples_dir / "make_iris_example_data.py"], cwd=_new_iris_dir.parent
)
# Move all the configuration files to our new directories
for cfg_file in _old_titanic_dir.glob("*.cfg"):
copyfile(cfg_file, _new_titanic_dir / cfg_file.name)
for cfg_file in _old_california_dir.glob("*.cfg"):
copyfile(cfg_file, _new_california_dir / cfg_file.name)
for cfg_file in _old_iris_dir.glob("*.cfg"):
copyfile(cfg_file, _new_iris_dir / cfg_file.name)
@classmethod
def tearDownClass(cls):
"""Clean up after tests, remove all directories we created."""
for dir_path in [_new_iris_dir, _new_california_dir, _new_titanic_dir]:
rmtree(dir_path)
def run_configuration_and_check_outputs(self, config_path):
"""Run given configuration, and check JSON results against expected ones."""
# run this experiment, get the `results_json_path`
results_json_path = Path(run_configuration(config_path, local=True, quiet=True)[0])
results_json_exp_path = other_dir / "expected" / PurePath(results_json_path).name
# if the results path and expected results exist, check the output
if results_json_path.exists() and results_json_exp_path.exists():
with open(results_json_path) as results_json_file:
results_obj = json.load(results_json_file)[0]
with open(results_json_exp_path) as results_json_exp_file:
results_exp_obj = json.load(results_json_exp_file)[0]
# we check a subset of the values, just to make sure
# that nothing weird is going on with our output
for key in [
"train_set_size",
"test_set_size",
"learner_name",
"cv_folds",
"feature_scaling",
"grid_score",
"grid_objective",
"accuracy",
"score",
"pearson",
]:
# we obviously want to skip any keys that we aren't expecting
if key in results_exp_obj:
actual = results_obj[key]
expected = results_exp_obj[key]
# if this is a float, then we check with less precision (2 decimals);
# otherwise, we check to make sure things are matching exactly
if isinstance(expected, float):
self.assertAlmostEqual(actual, expected, places=2)
else:
self.assertEqual(actual, expected)
def test_titanic_configs(self):
"""Run all of the configuration files for the titanic example."""
for config_path in _new_titanic_dir.glob("*.cfg"):
self.run_configuration_and_check_outputs(str(config_path))
def test_california_configs(self):
"""Run all of the configuration files for the california example."""
for config_path in _new_california_dir.glob("*.cfg"):
self.run_configuration_and_check_outputs(str(config_path))
def test_iris_configs(self):
"""Run all of the configuration files for the iris example."""
for config_path in _new_iris_dir.glob("*.cfg"):
self.run_configuration_and_check_outputs(str(config_path))