Skip to content

Commit f188c47

Browse files
committed
pupdate mglearn
1 parent 0fc51ea commit f188c47

8 files changed

+103
-50
lines changed

mglearn/plot_2d_separator.py

+12-7
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
from .plot_helpers import cm2, cm3, discrete_scatter
44

55

6-
def plot_2d_classification(classifier, X, fill=False, ax=None, eps=None, alpha=1, cm=cm3):
6+
def plot_2d_classification(classifier, X, fill=False, ax=None, eps=None,
7+
alpha=1, cm=cm3):
78
# multiclass
89
if eps is None:
910
eps = X.std() / 2.
@@ -28,7 +29,8 @@ def plot_2d_classification(classifier, X, fill=False, ax=None, eps=None, alpha=1
2829
ax.set_yticks(())
2930

3031

31-
def plot_2d_scores(classifier, X, ax=None, eps=None, alpha=1, cm="viridis", function=None):
32+
def plot_2d_scores(classifier, X, ax=None, eps=None, alpha=1, cm="viridis",
33+
function=None):
3234
# binary with fill
3335
if eps is None:
3436
eps = X.std() / 2.
@@ -44,7 +46,8 @@ def plot_2d_scores(classifier, X, ax=None, eps=None, alpha=1, cm="viridis", func
4446
X1, X2 = np.meshgrid(xx, yy)
4547
X_grid = np.c_[X1.ravel(), X2.ravel()]
4648
if function is None:
47-
function = getattr(classifier, "decision_function", getattr(classifier, "predict_proba"))
49+
function = getattr(classifier, "decision_function",
50+
getattr(classifier, "predict_proba"))
4851
else:
4952
function = getattr(classifier, function)
5053
decision_values = function(X_grid)
@@ -63,7 +66,8 @@ def plot_2d_scores(classifier, X, ax=None, eps=None, alpha=1, cm="viridis", func
6366

6467

6568
def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1,
66-
cm=cm2, linewidth=None, threshold=None, linestyle="solid"):
69+
cm=cm2, linewidth=None, threshold=None,
70+
linestyle="solid"):
6771
# binary?
6872
if eps is None:
6973
eps = X.std() / 2.
@@ -73,15 +77,16 @@ def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1,
7377

7478
x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
7579
y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
76-
xx = np.linspace(x_min, x_max, 100)
77-
yy = np.linspace(y_min, y_max, 100)
80+
xx = np.linspace(x_min, x_max, 1000)
81+
yy = np.linspace(y_min, y_max, 1000)
7882

7983
X1, X2 = np.meshgrid(xx, yy)
8084
X_grid = np.c_[X1.ravel(), X2.ravel()]
8185
try:
8286
decision_values = classifier.decision_function(X_grid)
8387
levels = [0] if threshold is None else [threshold]
84-
fill_levels = [decision_values.min()] + levels + [decision_values.max()]
88+
fill_levels = [decision_values.min()] + levels + [
89+
decision_values.max()]
8590
except AttributeError:
8691
# no decision_function
8792
decision_values = classifier.predict_proba(X_grid)[:, 1]

mglearn/plot_cross_validation.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ def plot_group_kfold():
77
groups = [0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3]
88

99
plt.figure(figsize=(10, 2))
10-
plt.title("LabelKFold")
10+
plt.title("GroupKFold")
1111

1212
axes = plt.gca()
1313
axes.set_frame_on(False)

mglearn/plot_dbscan.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from sklearn.cluster import DBSCAN
44
from sklearn.datasets import make_blobs
55

6-
from .plot_helpers import discrete_scatter
6+
from .plot_helpers import discrete_scatter, cm3
77

88

99
def plot_dbscan():
@@ -13,9 +13,10 @@ def plot_dbscan():
1313
clusters = dbscan.fit_predict(X)
1414
clusters
1515

16-
fig, axes = plt.subplots(3, 4, figsize=(11, 8), subplot_kw={'xticks': (), 'yticks': ()})
16+
fig, axes = plt.subplots(3, 4, figsize=(11, 8),
17+
subplot_kw={'xticks': (), 'yticks': ()})
1718
# Plot clusters as red, green and blue, and outliers (-1) as white
18-
colors = ['r', 'g', 'b']
19+
colors = [cm3(1), cm3(0), cm3(2)]
1920
markers = ['o', '^', 'v']
2021

2122
# iterate over settings of min_samples and eps
@@ -25,19 +26,22 @@ def plot_dbscan():
2526
dbscan = DBSCAN(min_samples=min_samples, eps=eps)
2627
# get cluster assignments
2728
clusters = dbscan.fit_predict(X)
28-
print("min_samples: %d eps: %f cluster: %s" % (min_samples, eps, clusters))
29+
print("min_samples: %d eps: %f cluster: %s"
30+
% (min_samples, eps, clusters))
2931
if np.any(clusters == -1):
3032
c = ['w'] + colors
3133
m = ['o'] + markers
3234
else:
3335
c = colors
3436
m = markers
35-
discrete_scatter(X[:, 0], X[:, 1], clusters, ax=axes[i, j], c=c, s=8, markers=m)
37+
discrete_scatter(X[:, 0], X[:, 1], clusters, ax=axes[i, j], c=c,
38+
s=8, markers=m)
3639
inds = dbscan.core_sample_indices_
3740
# vizualize core samples and clusters.
3841
if len(inds):
3942
discrete_scatter(X[inds, 0], X[inds, 1], clusters[inds],
4043
ax=axes[i, j], s=15, c=colors,
4144
markers=markers)
42-
axes[i, j].set_title("min_samples: %d eps: %.1f" % (min_samples, eps))
45+
axes[i, j].set_title("min_samples: %d eps: %.1f"
46+
% (min_samples, eps))
4347
fig.tight_layout()

mglearn/plot_grid_search.py

+32-20
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,31 @@ def plot_cross_val_selection():
1616
'gamma': [0.001, 0.01, 0.1, 1, 10, 100]}
1717
grid_search = GridSearchCV(SVC(), param_grid, cv=5)
1818
grid_search.fit(X_trainval, y_trainval)
19-
results = pd.DataFrame(grid_search.results_)[15:]
19+
results = pd.DataFrame(grid_search.cv_results_)[15:]
2020

21-
best = np.argmax(results.test_mean_score.values)
21+
best = np.argmax(results.mean_test_score.values)
2222
plt.figure(figsize=(10, 3))
2323
plt.xlim(-1, len(results))
2424
plt.ylim(0, 1.1)
2525
for i, (_, row) in enumerate(results.iterrows()):
2626
scores = row[['test_split%d_score' % i for i in range(5)]]
27-
marker_cv, = plt.plot([i] * 5, scores, '^', c='gray', markersize=5, alpha=.5)
28-
marker_mean, = plt.plot(i, row.test_mean_score, 'v', c='none', alpha=1, markersize=10)
27+
marker_cv, = plt.plot([i] * 5, scores, '^', c='gray', markersize=5,
28+
alpha=.5)
29+
marker_mean, = plt.plot(i, row.mean_test_score, 'v', c='none', alpha=1,
30+
markersize=10)
2931
if i == best:
30-
marker_best, = plt.plot(i, row.test_mean_score, 'o', c='red', fillstyle="none",
31-
alpha=1, markersize=20, markeredgewidth=3)
32+
marker_best, = plt.plot(i, row.mean_test_score, 'o', c='red',
33+
fillstyle="none", alpha=1, markersize=20,
34+
markeredgewidth=3)
3235

33-
plt.xticks(range(len(results)), [str(x).strip("{}").replace("'", "")
34-
for x in grid_search.results_['params']], rotation=90)
36+
plt.xticks(range(len(results)), [str(x).strip("{}").replace("'", "") for x
37+
in grid_search.cv_results_['params']],
38+
rotation=90)
3539
plt.ylabel("Validation accuracy")
3640
plt.xlabel("Parameter settings")
3741
plt.legend([marker_cv, marker_mean, marker_best],
38-
["cv accuracy", "mean accuracy", "best parameter setting"], loc=(1.05, .4))
42+
["cv accuracy", "mean accuracy", "best parameter setting"],
43+
loc=(1.05, .4))
3944

4045

4146
def plot_grid_search_overview():
@@ -54,9 +59,10 @@ def draw(ax, text, start, target=None):
5459
patchB = None
5560
annotation = ax.annotate(text, end, start, xycoords='axes pixels',
5661
textcoords='axes pixels', size=20,
57-
arrowprops=dict(arrowstyle="-|>", fc="w",
58-
ec="k", patchB=patchB,
59-
connectionstyle="arc3,rad=0.0"),
62+
arrowprops=dict(
63+
arrowstyle="-|>", fc="w", ec="k",
64+
patchB=patchB,
65+
connectionstyle="arc3,rad=0.0"),
6066
bbox=dict(boxstyle="round", fc="w"),
6167
horizontalalignment="center",
6268
verticalalignment="center")
@@ -66,15 +72,21 @@ def draw(ax, text, start, target=None):
6672
step = 100
6773
grr = 400
6874

69-
final_evaluation = draw(axes, "final evaluation", (5 * step, grr - 3 * step))
70-
retrained_model = draw(axes, "retrained model", (3 * step, grr - 3 * step), final_evaluation)
71-
best_parameters = draw(axes, "best parameters", (.5 * step, grr - 3 * step), retrained_model)
72-
cross_validation = draw(axes, "cross-validation", (.5 * step, grr - 2 * step), best_parameters)
73-
parameters = draw(axes, "parameter grid", (0.0, grr - 0), cross_validation)
74-
training_data = draw(axes, "training data", (2 * step, grr - step), cross_validation)
75+
final_evaluation = draw(axes, "final evaluation", (5 * step, grr - 3 *
76+
step))
77+
retrained_model = draw(axes, "retrained model", (3 * step, grr - 3 * step),
78+
final_evaluation)
79+
best_parameters = draw(axes, "best parameters", (.5 * step, grr - 3 *
80+
step), retrained_model)
81+
cross_validation = draw(axes, "cross-validation", (.5 * step, grr - 2 *
82+
step), best_parameters)
83+
draw(axes, "parameter grid", (0.0, grr - 0), cross_validation)
84+
training_data = draw(axes, "training data", (2 * step, grr - step),
85+
cross_validation)
7586
draw(axes, "training data", (2 * step, grr - step), retrained_model)
76-
test_data = draw(axes, "test data", (5 * step, grr - step), final_evaluation)
87+
test_data = draw(axes, "test data", (5 * step, grr - step),
88+
final_evaluation)
7789
draw(axes, "data set", (3.5 * step, grr - 0.0), training_data)
78-
data_set = draw(axes, "data set", (3.5 * step, grr - 0.0), test_data)
90+
draw(axes, "data set", (3.5 * step, grr - 0.0), test_data)
7991
plt.ylim(0, 1)
8092
plt.xlim(0, 1.5)

mglearn/plot_helpers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from matplotlib.colors import ListedColormap, colorConverter, LinearSegmentedColormap
55

66

7-
cm_cycle = ListedColormap(['#0000aa', '#ff2020', '#50ff50', 'c', '#fff000'])
7+
cm_cycle = ListedColormap(['#0000aa', '#ff5050', '#50ff50', '#9040a0', '#fff000'])
88
cm3 = ListedColormap(['#0000aa', '#ff2020', '#50ff50'])
99
cm2 = ListedColormap(['#0000aa', '#ff2020'])
1010

mglearn/plot_knn_regression.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
from sklearn.neighbors import KNeighborsRegressor
55
from sklearn.metrics import euclidean_distances
66

7-
from mglearn.datasets import make_wave
7+
from .datasets import make_wave
8+
from .plot_helpers import cm3
89

910

1011
def plot_knn_regression(n_neighbors=1):
@@ -24,12 +25,14 @@ def plot_knn_regression(n_neighbors=1):
2425
plt.arrow(x[0], y_, X[neighbor, 0] - x[0], y[neighbor] - y_,
2526
head_width=0, fc='k', ec='k')
2627

27-
train, = plt.plot(X, y, 'o')
28-
test, = plt.plot(X_test, -3 * np.ones(len(X_test)), '*', c='g', markersize=20)
29-
pred, = plt.plot(X_test, y_pred, '*', c='b', markersize=20)
28+
train, = plt.plot(X, y, 'o', c=cm3(0))
29+
test, = plt.plot(X_test, -3 * np.ones(len(X_test)), '*', c=cm3(2),
30+
markersize=20)
31+
pred, = plt.plot(X_test, y_pred, '*', c=cm3(0), markersize=20)
3032
plt.vlines(X_test, -3.1, 3.1, linestyle="--")
3133
plt.legend([train, test, pred],
32-
["training data/target", "test data", "test prediction"], ncol=3, loc=(.1, 1.025))
34+
["training data/target", "test data", "test prediction"],
35+
ncol=3, loc=(.1, 1.025))
3336
plt.ylim(-3.1, 3.1)
3437
plt.xlabel("Feature")
3538
plt.ylabel("Target")

mglearn/plot_scaling.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import matplotlib.pyplot as plt
22
import numpy as np
33
from sklearn.datasets import make_blobs
4-
from sklearn.preprocessing import StandardScaler, MinMaxScaler, Normalizer, RobustScaler
4+
from sklearn.preprocessing import (StandardScaler, MinMaxScaler, Normalizer,
5+
RobustScaler)
56
from .plot_helpers import cm2
67

78

@@ -19,7 +20,8 @@ def plot_scaling():
1920
main_ax.set_xlim(-maxx + 1, maxx + 1)
2021
main_ax.set_ylim(-maxy + 1, maxy + 1)
2122
main_ax.set_title("Original Data")
22-
other_axes = [plt.subplot2grid((2, 4), (i, j)) for j in range(2, 4) for i in range(2)]
23+
other_axes = [plt.subplot2grid((2, 4), (i, j))
24+
for j in range(2, 4) for i in range(2)]
2325

2426
for ax, scaler in zip(other_axes, [StandardScaler(), RobustScaler(),
2527
MinMaxScaler(), Normalizer(norm='l2')]):

mglearn/tools.py

+35-8
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,48 @@
22
from sklearn.datasets import make_blobs
33
from sklearn.tree import export_graphviz
44
import matplotlib.pyplot as plt
5-
from .plot_2d_separator import plot_2d_separator, plot_2d_classification, plot_2d_scores
5+
from .plot_2d_separator import (plot_2d_separator, plot_2d_classification,
6+
plot_2d_scores)
67
from .plot_helpers import cm2 as cm, discrete_scatter
78

89

910
def visualize_coefficients(coefficients, feature_names, n_top_features=25):
11+
"""Visualize coefficients of a linear model.
12+
13+
Parameters
14+
----------
15+
coefficients : nd-array, shape (n_features,)
16+
Model coefficients.
17+
18+
feature_names : list or nd-array of strings, shape (n_features,)
19+
Feature names for labeling the coefficients.
20+
21+
n_top_features : int, default=25
22+
How many features to show. The function will show the largest (most
23+
positive) and smallest (most negative) n_top_features coefficients,
24+
for a total of 2 * n_top_features coefficients.
25+
"""
26+
if len(coefficients) != len(feature_names):
27+
raise ValueError("Number of coefficients {} doesn't match number of"
28+
"feature names {}.".format(len(coefficients),
29+
len(feature_names)))
1030
# get coefficients with large absolute values
1131
coef = coefficients.ravel()
1232
positive_coefficients = np.argsort(coef)[-n_top_features:]
1333
negative_coefficients = np.argsort(coef)[:n_top_features]
14-
interesting_coefficients = np.hstack([negative_coefficients, positive_coefficients])
34+
interesting_coefficients = np.hstack([negative_coefficients,
35+
positive_coefficients])
1536
# plot them
1637
plt.figure(figsize=(15, 5))
17-
colors = [cm(1) if c < 0 else cm(0) for c in coef[interesting_coefficients]]
18-
plt.bar(np.arange(2 * n_top_features), coef[interesting_coefficients], color=colors)
38+
colors = [cm(1) if c < 0 else cm(0)
39+
for c in coef[interesting_coefficients]]
40+
plt.bar(np.arange(2 * n_top_features), coef[interesting_coefficients],
41+
color=colors)
1942
feature_names = np.array(feature_names)
2043
plt.subplots_adjust(bottom=0.3)
2144
plt.xticks(np.arange(1, 1 + 2 * n_top_features),
22-
feature_names[interesting_coefficients], rotation=60, ha="right")
45+
feature_names[interesting_coefficients], rotation=60,
46+
ha="right")
2347
plt.ylabel("Coefficient magnitude")
2448
plt.xlabel("Feature")
2549

@@ -39,7 +63,8 @@ def heatmap(values, xlabel, ylabel, xticklabels, yticklabels, cmap=None,
3963
ax.set_yticklabels(yticklabels)
4064
ax.set_aspect(1)
4165

42-
for p, color, value in zip(img.get_paths(), img.get_facecolors(), img.get_array()):
66+
for p, color, value in zip(img.get_paths(), img.get_facecolors(),
67+
img.get_array()):
4368
x, y = p.vertices[:-2, :].mean(0)
4469
if np.mean(color[:3]) > 0.5:
4570
c = 'k'
@@ -59,7 +84,8 @@ def make_handcrafted_dataset():
5984
return X, y
6085

6186

62-
def print_topics(topics, feature_names, sorting, topics_per_chunk=6, n_words=20):
87+
def print_topics(topics, feature_names, sorting, topics_per_chunk=6,
88+
n_words=20):
6389
for i in range(0, len(topics), topics_per_chunk):
6490
# for each chunk:
6591
these_topics = topics[i: i + topics_per_chunk]
@@ -71,7 +97,8 @@ def print_topics(topics, feature_names, sorting, topics_per_chunk=6, n_words=20)
7197
# print top n_words frequent words
7298
for i in range(n_words):
7399
try:
74-
print(("{:<14}" * len_this_chunk).format(*feature_names[sorting[these_topics, i]]))
100+
print(("{:<14}" * len_this_chunk).format(
101+
*feature_names[sorting[these_topics, i]]))
75102
except:
76103
pass
77104
print("\n")

0 commit comments

Comments
 (0)