Skip to content

Commit bb34a1f

Browse files
committed
This closes issue amueller#67 memory error on 32bit Python
Minor refinements due PR feedback - Removed unnecessary variables and inlined one-time used variable. - Re-introduced the originally intended solver solver='lbfgs' - Adhered to PEP8, breaking lines and comments accordingly
1 parent c194b89 commit bb34a1f

File tree

1 file changed

+20
-18
lines changed

1 file changed

+20
-18
lines changed

mglearn/plot_2d_separator.py

+20-18
Original file line numberDiff line numberDiff line change
@@ -3,30 +3,30 @@
33
from .plot_helpers import cm2, cm3, discrete_scatter
44

55
def _call_classifier_chunked(classifier_pred_or_decide, X):
6-
7-
8-
# The chunk_size is used to chunk the large arrays to work with x86 memory
9-
# models that are restricted to < 2 GB in memory allocation.
10-
# The chunk_size value used here is based on a measurement with the MLPClassifier
11-
# using the following parameters:
12-
# MLPClassifier(solver='lbfgs', random_state=0, hidden_layer_sizes=[1000,1000,1000])
6+
# The chunk_size is used to chunk the large arrays to work with x86
7+
# memory models that are restricted to < 2 GB in memory allocation. The
8+
# chunk_size value used here is based on a measurement with the
9+
# MLPClassifier using the following parameters:
10+
# MLPClassifier(solver='lbfgs', random_state=0,
11+
# hidden_layer_sizes=[1000,1000,1000])
1312
# by reducing the value it is possible to trade in time for memory.
14-
# It is possible to chunk the array as the calculations are independent of each other.
15-
# Note: an intermittent version made a distinction between 32- and 64 bit architectures
16-
# avoiding the chunking. Testing revealed that even on 64 bit architectures the chunking
17-
# increases the performance by a factor of 3-5, largely due to the avoidance of memory
13+
# It is possible to chunk the array as the calculations are independent of
14+
# each other.
15+
# Note: an intermittent version made a distinction between
16+
# 32- and 64 bit architectures avoiding the chunking. Testing revealed
17+
# that even on 64 bit architectures the chunking increases the
18+
# performance by a factor of 3-5, largely due to the avoidance of memory
1819
# swapping.
1920
chunk_size = 10000
20-
X_axis0_size = X.shape[0]
2121

2222
# We use a list to collect all result chunks
2323
Y_result_chunks = []
2424

2525
# Call the classifier in chunks.
26-
y_chunk_pos = 0
27-
for x_chunk in np.array_split(X, np.arange(chunk_size,X_axis0_size,chunk_size,dtype=np.int32), axis=0):
26+
for x_chunk in np.array_split(X, np.arange(chunk_size, X.shape[0],
27+
chunk_size, dtype=np.int32),
28+
axis=0):
2829
Y_result_chunks.append(classifier_pred_or_decide(x_chunk))
29-
y_chunk_pos += x_chunk.shape[0]
3030

3131
return np.concatenate(Y_result_chunks)
3232

@@ -111,13 +111,15 @@ def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1,
111111
X1, X2 = np.meshgrid(xx, yy)
112112
X_grid = np.c_[X1.ravel(), X2.ravel()]
113113
if hasattr(classifier, "decision_function"):
114-
decision_values = _call_classifier_chunked(classifier.decision_function, X_grid)
114+
decision_values = _call_classifier_chunked(classifier.decision_function,
115+
X_grid)
115116
levels = [0] if threshold is None else [threshold]
116117
fill_levels = [decision_values.min()] + levels + [
117118
decision_values.max()]
118119
else:
119120
# no decision_function
120-
decision_values = _call_classifier_chunked(classifier.predict_proba, X_grid)[:, 1]
121+
decision_values = _call_classifier_chunked(classifier.predict_proba,
122+
X_grid)[:, 1]
121123
levels = [.5] if threshold is None else [threshold]
122124
fill_levels = [0] + levels + [1]
123125
if fill:
@@ -138,7 +140,7 @@ def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1,
138140
from sklearn.datasets import make_blobs
139141
from sklearn.linear_model import LogisticRegression
140142
X, y = make_blobs(centers=2, random_state=42)
141-
clf = LogisticRegression().fit(X, y)
143+
clf = LogisticRegression(solver='lbfgs').fit(X, y)
142144
plot_2d_separator(clf, X, fill=True)
143145
discrete_scatter(X[:, 0], X[:, 1], y)
144146
plt.show()

0 commit comments

Comments
 (0)