Skip to content

Commit ea60cf6

Browse files
committed
get updates from mglearn repo
1 parent 842e623 commit ea60cf6

10 files changed

+39
-65
lines changed

mglearn/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,6 @@
44
from .tools import discrete_scatter
55
from .plot_helpers import ReBl
66

7+
__version__ = "0.2.0"
8+
79
__all__ = ['tools', 'plots', 'cm3', 'cm2', 'discrete_scatter', 'ReBl']

mglearn/datasets.py

+16-3
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,18 @@
22
import pandas as pd
33
import os
44
from scipy import signal
5-
from sklearn.datasets import load_boston
65
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures
76
from sklearn.datasets import make_blobs
7+
from sklearn.utils import Bunch
88

9-
DATA_PATH = os.path.join(os.path.dirname(__file__), "..", "data")
9+
DATA_PATH = os.path.join(os.path.dirname(__file__), "data")
1010

1111

1212
def make_forge():
1313
# a carefully hand-designed dataset lol
1414
X, y = make_blobs(centers=2, random_state=4, n_samples=30)
1515
y[np.array([7, 27])] = 0
16-
mask = np.ones(len(X), dtype=np.bool)
16+
mask = np.ones(len(X), dtype=bool)
1717
mask[np.array([0, 1, 5, 26])] = 0
1818
X, y = X[mask], y[mask]
1919
return X, y
@@ -27,6 +27,19 @@ def make_wave(n_samples=100):
2727
return x.reshape(-1, 1), y
2828

2929

30+
def load_boston():
31+
try:
32+
from sklearn.datasets import load_boston
33+
return load_boston()
34+
except ImportError:
35+
pass
36+
data_url = "http://lib.stat.cmu.edu/datasets/boston"
37+
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
38+
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
39+
target = raw_df.values[1::2, 2]
40+
return Bunch(data=data, target=target)
41+
42+
3043
def load_extended_boston():
3144
boston = load_boston()
3245
X = boston.data

mglearn/plot_2d_separator.py

+5-45
Original file line numberDiff line numberDiff line change
@@ -2,34 +2,6 @@
22
import matplotlib.pyplot as plt
33
from .plot_helpers import cm2, cm3, discrete_scatter
44

5-
def _call_classifier_chunked(classifier_pred_or_decide, X):
6-
# The chunk_size is used to chunk the large arrays to work with x86
7-
# memory models that are restricted to < 2 GB in memory allocation. The
8-
# chunk_size value used here is based on a measurement with the
9-
# MLPClassifier using the following parameters:
10-
# MLPClassifier(solver='lbfgs', random_state=0,
11-
# hidden_layer_sizes=[1000,1000,1000])
12-
# by reducing the value it is possible to trade in time for memory.
13-
# It is possible to chunk the array as the calculations are independent of
14-
# each other.
15-
# Note: an intermittent version made a distinction between
16-
# 32- and 64 bit architectures avoiding the chunking. Testing revealed
17-
# that even on 64 bit architectures the chunking increases the
18-
# performance by a factor of 3-5, largely due to the avoidance of memory
19-
# swapping.
20-
chunk_size = 10000
21-
22-
# We use a list to collect all result chunks
23-
Y_result_chunks = []
24-
25-
# Call the classifier in chunks.
26-
for x_chunk in np.array_split(X, np.arange(chunk_size, X.shape[0],
27-
chunk_size, dtype=np.int32),
28-
axis=0):
29-
Y_result_chunks.append(classifier_pred_or_decide(x_chunk))
30-
31-
return np.concatenate(Y_result_chunks)
32-
335

346
def plot_2d_classification(classifier, X, fill=False, ax=None, eps=None,
357
alpha=1, cm=cm3):
@@ -110,16 +82,14 @@ def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1,
11082

11183
X1, X2 = np.meshgrid(xx, yy)
11284
X_grid = np.c_[X1.ravel(), X2.ravel()]
113-
if hasattr(classifier, "decision_function"):
114-
decision_values = _call_classifier_chunked(classifier.decision_function,
115-
X_grid)
85+
try:
86+
decision_values = classifier.decision_function(X_grid)
11687
levels = [0] if threshold is None else [threshold]
11788
fill_levels = [decision_values.min()] + levels + [
11889
decision_values.max()]
119-
else:
90+
except AttributeError:
12091
# no decision_function
121-
decision_values = _call_classifier_chunked(classifier.predict_proba,
122-
X_grid)[:, 1]
92+
decision_values = classifier.predict_proba(X_grid)[:, 1]
12393
levels = [.5] if threshold is None else [threshold]
12494
fill_levels = [0] + levels + [1]
12595
if fill:
@@ -133,14 +103,4 @@ def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1,
133103
ax.set_xlim(x_min, x_max)
134104
ax.set_ylim(y_min, y_max)
135105
ax.set_xticks(())
136-
ax.set_yticks(())
137-
138-
139-
if __name__ == '__main__':
140-
from sklearn.datasets import make_blobs
141-
from sklearn.linear_model import LogisticRegression
142-
X, y = make_blobs(centers=2, random_state=42)
143-
clf = LogisticRegression(solver='lbfgs').fit(X, y)
144-
plot_2d_separator(clf, X, fill=True)
145-
discrete_scatter(X[:, 0], X[:, 1], y)
146-
plt.show()
106+
ax.set_yticks(())

mglearn/plot_grid_search.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,8 @@ def plot_cross_val_selection():
3333
marker_best, = plt.plot(i, row.mean_test_score, 'o', c='red',
3434
fillstyle="none", alpha=1, markersize=20,
3535
markeredgewidth=3)
36-
3736
plt.xticks(range(len(results)), [str(x).strip("{}").replace("'", "") for x
38-
in grid_search.cv_results_['params']],
37+
in results['params']],
3938
rotation=90)
4039
plt.ylabel("Validation accuracy")
4140
plt.xlabel("Parameter settings")

mglearn/plot_kneighbors_regularization.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,4 @@ def plot_kneighbors_regularization():
2222
ax.plot(x_test, kneighbor_regression.predict(x_test[:, np.newaxis]),
2323
label="prediction")
2424
ax.legend()
25-
ax.set_title("n_neighbors = %d" % n_neighbors)
26-
27-
28-
if __name__ == "__main__":
29-
plot_kneighbors_regularization()
30-
plt.show()
25+
ax.set_title("n_neighbors = %d" % n_neighbors)

mglearn/plot_linear_svc_regularization.py

-4
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,3 @@ def plot_linear_svc_regularization():
3131
ax.set_yticks(())
3232
ax.set_title("C = %f" % C)
3333
axes[0].legend(loc="best")
34-
35-
if __name__ == "__main__":
36-
plot_linear_svc_regularization()
37-
plt.show()

mglearn/plot_nmf.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,11 @@
44

55
from joblib import Memory
66

7-
memory = Memory(location="cache")
7+
try:
8+
memory = Memory(cachedir="cache")
9+
except TypeError:
10+
# joblib.Memory changed its API in 0.12
11+
memory = Memory(location="cache", verbose=0)
812

913

1014
def plot_nmf_illustration():

mglearn/plot_pca.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,11 @@
44

55
from joblib import Memory
66

7-
memory = Memory(location="cache")
8-
7+
try:
8+
memory = Memory(cachedir="cache")
9+
except TypeError:
10+
# joblib.Memory changed its API in 0.12
11+
memory = Memory(location="cache", verbose=0)
912

1013
def plot_pca_illustration():
1114
rnd = np.random.RandomState(5)

mglearn/plots.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
plot_decision_threshold)
2929
from .plot_dbscan import plot_dbscan
3030
from .plot_ridge import plot_ridge_n_samples
31+
from .plot_kneighbors_regularization import plot_kneighbors_regularization
3132

3233
__all__ = ['plot_linear_svc_regularization',
3334
"plot_animal_tree", "plot_tree_progressive",
@@ -65,5 +66,6 @@
6566
'plot_binary_confusion_matrix',
6667
'plot_decision_threshold',
6768
'plot_dbscan',
68-
'plot_ridge_n_samples'
69+
'plot_ridge_n_samples',
70+
'plot_kneighbors_regularization'
6971
]

mglearn/tools.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def make_handcrafted_dataset():
8585
# a carefully hand-designed dataset lol
8686
X, y = make_blobs(centers=2, random_state=4, n_samples=30)
8787
y[np.array([7, 27])] = 0
88-
mask = np.ones(len(X), dtype=np.bool)
88+
mask = np.ones(len(X), dtype=bool)
8989
mask[np.array([0, 1, 5, 26])] = 0
9090
X, y = X[mask], y[mask]
9191
return X, y

0 commit comments

Comments
 (0)