Skip to content

Commit c194b89

Browse files
committed
This closes issue amueller#67 memory error on 32bit Python
Main change: - Created chunking logic to call the classifier with a maximum number of tests (detailed description as code comment). In addition the following change was made: - Replaced the try/catch with an explicit check for the available function either decision_function or predict_proba.
1 parent 52da2a9 commit c194b89

File tree

1 file changed

+32
-4
lines changed

1 file changed

+32
-4
lines changed

mglearn/plot_2d_separator.py

+32-4
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,34 @@
22
import matplotlib.pyplot as plt
33
from .plot_helpers import cm2, cm3, discrete_scatter
44

5+
def _call_classifier_chunked(classifier_pred_or_decide, X):
6+
7+
8+
# The chunk_size is used to chunk the large arrays to work with x86 memory
9+
# models that are restricted to < 2 GB in memory allocation.
10+
# The chunk_size value used here is based on a measurement with the MLPClassifier
11+
# using the following parameters:
12+
# MLPClassifier(solver='lbfgs', random_state=0, hidden_layer_sizes=[1000,1000,1000])
13+
# by reducing the value it is possible to trade in time for memory.
14+
# It is possible to chunk the array as the calculations are independent of each other.
15+
# Note: an intermittent version made a distinction between 32- and 64 bit architectures
16+
# avoiding the chunking. Testing revealed that even on 64 bit architectures the chunking
17+
# increases the performance by a factor of 3-5, largely due to the avoidance of memory
18+
# swapping.
19+
chunk_size = 10000
20+
X_axis0_size = X.shape[0]
21+
22+
# We use a list to collect all result chunks
23+
Y_result_chunks = []
24+
25+
# Call the classifier in chunks.
26+
y_chunk_pos = 0
27+
for x_chunk in np.array_split(X, np.arange(chunk_size,X_axis0_size,chunk_size,dtype=np.int32), axis=0):
28+
Y_result_chunks.append(classifier_pred_or_decide(x_chunk))
29+
y_chunk_pos += x_chunk.shape[0]
30+
31+
return np.concatenate(Y_result_chunks)
32+
533

634
def plot_2d_classification(classifier, X, fill=False, ax=None, eps=None,
735
alpha=1, cm=cm3):
@@ -82,14 +110,14 @@ def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1,
82110

83111
X1, X2 = np.meshgrid(xx, yy)
84112
X_grid = np.c_[X1.ravel(), X2.ravel()]
85-
try:
86-
decision_values = classifier.decision_function(X_grid)
113+
if hasattr(classifier, "decision_function"):
114+
decision_values = _call_classifier_chunked(classifier.decision_function, X_grid)
87115
levels = [0] if threshold is None else [threshold]
88116
fill_levels = [decision_values.min()] + levels + [
89117
decision_values.max()]
90-
except AttributeError:
118+
else:
91119
# no decision_function
92-
decision_values = classifier.predict_proba(X_grid)[:, 1]
120+
decision_values = _call_classifier_chunked(classifier.predict_proba, X_grid)[:, 1]
93121
levels = [.5] if threshold is None else [threshold]
94122
fill_levels = [0] + levels + [1]
95123
if fill:

0 commit comments

Comments
 (0)