forked from amueller/introduction_to_ml_with_python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot_grid_search.py
93 lines (83 loc) · 4.06 KB
/
plot_grid_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.datasets import load_iris
import pandas as pd
def plot_cross_val_selection():
iris = load_iris()
X_trainval, X_test, y_trainval, y_test = train_test_split(iris.data,
iris.target,
random_state=0)
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100],
'gamma': [0.001, 0.01, 0.1, 1, 10, 100]}
grid_search = GridSearchCV(SVC(), param_grid, cv=5,
return_train_score=True)
grid_search.fit(X_trainval, y_trainval)
results = pd.DataFrame(grid_search.cv_results_)[15:]
best = np.argmax(results.mean_test_score.values)
plt.figure(figsize=(10, 3))
plt.xlim(-1, len(results))
plt.ylim(0, 1.1)
for i, (_, row) in enumerate(results.iterrows()):
scores = row[['split%d_test_score' % i for i in range(5)]]
marker_cv, = plt.plot([i] * 5, scores, '^', c='gray', markersize=5,
alpha=.5)
marker_mean, = plt.plot(i, row.mean_test_score, 'v', c='none', alpha=1,
markersize=10, markeredgecolor='k')
if i == best:
marker_best, = plt.plot(i, row.mean_test_score, 'o', c='red',
fillstyle="none", alpha=1, markersize=20,
markeredgewidth=3)
plt.xticks(range(len(results)), [str(x).strip("{}").replace("'", "") for x
in grid_search.cv_results_['params']],
rotation=90)
plt.ylabel("Validation accuracy")
plt.xlabel("Parameter settings")
plt.legend([marker_cv, marker_mean, marker_best],
["cv accuracy", "mean accuracy", "best parameter setting"],
loc=(1.05, .4))
def plot_grid_search_overview():
plt.figure(figsize=(10, 3), dpi=70)
axes = plt.gca()
axes.yaxis.set_visible(False)
axes.xaxis.set_visible(False)
axes.set_frame_on(False)
def draw(ax, text, start, target=None):
if target is not None:
patchB = target.get_bbox_patch()
end = target.get_position()
else:
end = start
patchB = None
annotation = ax.annotate(text, end, start, xycoords='axes pixels',
textcoords='axes pixels', size=20,
arrowprops=dict(
arrowstyle="-|>", fc="w", ec="k",
patchB=patchB,
connectionstyle="arc3,rad=0.0"),
bbox=dict(boxstyle="round", fc="w"),
horizontalalignment="center",
verticalalignment="center")
plt.draw()
return annotation
step = 100
grr = 400
final_evaluation = draw(axes, "final evaluation", (5 * step, grr - 3 *
step))
retrained_model = draw(axes, "retrained model", (3 * step, grr - 3 * step),
final_evaluation)
best_parameters = draw(axes, "best parameters", (.5 * step, grr - 3 *
step), retrained_model)
cross_validation = draw(axes, "cross-validation", (.5 * step, grr - 2 *
step), best_parameters)
draw(axes, "parameter grid", (0.0, grr - 0), cross_validation)
training_data = draw(axes, "training data", (2 * step, grr - step),
cross_validation)
draw(axes, "training data", (2 * step, grr - step), retrained_model)
test_data = draw(axes, "test data", (5 * step, grr - step),
final_evaluation)
draw(axes, "data set", (3.5 * step, grr - 0.0), training_data)
draw(axes, "data set", (3.5 * step, grr - 0.0), test_data)
plt.ylim(0, 1)
plt.xlim(0, 1.5)