From b86af34b5cdd7ce52fc976d05b06590fc8a8d8af Mon Sep 17 00:00:00 2001 From: behreth Date: Sun, 9 Dec 2018 23:21:06 +0100 Subject: [PATCH] This closes issue amueller/introduction_to_ml_with_python#67 memory error on 32bit Python Main change: - Created chunking logic to call the classifier with a maximum number of tests (detailed description as code comment). In addition the following changes were made: - Replaced the try/catch with an explicit check for the available function either decision_function or predict_proba. - Explicitly defined the solver function in the example due to changes in the interface as per recommendation. --- mglearn/plot_2d_separator.py | 43 +++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/mglearn/plot_2d_separator.py b/mglearn/plot_2d_separator.py index 9b32028..495d206 100644 --- a/mglearn/plot_2d_separator.py +++ b/mglearn/plot_2d_separator.py @@ -2,6 +2,39 @@ import matplotlib.pyplot as plt from .plot_helpers import cm2, cm3, discrete_scatter +def _call_classifier_chunked(classifier_pred_or_decide, X): + + + # The chunk_size is used to chunk the large arrays to work with x86 memory + # models that are restricted to < 2 GB in memory allocation. + # The chunk_size value used here is based on a measurement with the MLPClassifier + # using the following parameters: + # MLPClassifier(solver='lbfgs', random_state=0, hidden_layer_sizes=[1000,1000,1000]) + # by reducing the value it is possible to trade in time for memory. + # It is possible to chunk the array as the calculations are independent of each other. + # Note: an intermittent version made a distinction between 32- and 64 bit architectures + # avoiding the chunking. Testing revealed that even on 64 bit architectures the chunking + # increases the performance by a factor of 3-5, probably due to the avoidance of memory + # swapping. + chunk_size = 10000 + X_axis0_size = X.shape[0] + + # Pre-allocate the entire result set to avoid array copying for efficiency. + # As we do not know the shape of the output, as it depends on whether a + # decision function or the predict probability is called, we simply test the output + # size for a single sample and use the shape of the output + Y_result = np.empty((X_axis0_size,) + classifier_pred_or_decide(X[0:1]).shape[1:]) + + # Call the classifier in chunks. + y_chunk_pos = 0 + for x_chunk in np.array_split(X, np.arange(chunk_size,X_axis0_size,chunk_size,dtype=np.int32), axis=0): + Y_result[y_chunk_pos:y_chunk_pos + x_chunk.shape[0]] = classifier_pred_or_decide(x_chunk) + + y_chunk_pos += x_chunk.shape[0] + + return Y_result + + def plot_2d_classification(classifier, X, fill=False, ax=None, eps=None, alpha=1, cm=cm3): @@ -82,14 +115,14 @@ def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1, X1, X2 = np.meshgrid(xx, yy) X_grid = np.c_[X1.ravel(), X2.ravel()] - try: - decision_values = classifier.decision_function(X_grid) + if hasattr(classifier, "decision_function"): + decision_values = _call_classifier_chunked(classifier.decision_function, X_grid) levels = [0] if threshold is None else [threshold] fill_levels = [decision_values.min()] + levels + [ decision_values.max()] - except AttributeError: + else: # no decision_function - decision_values = classifier.predict_proba(X_grid)[:, 1] + decision_values = _call_classifier_chunked(classifier.predict_proba, X_grid)[:, 1] levels = [.5] if threshold is None else [threshold] fill_levels = [0] + levels + [1] if fill: @@ -110,7 +143,7 @@ def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1, from sklearn.datasets import make_blobs from sklearn.linear_model import LogisticRegression X, y = make_blobs(centers=2, random_state=42) - clf = LogisticRegression().fit(X, y) + clf = LogisticRegression(solver = 'liblinear').fit(X, y) plot_2d_separator(clf, X, fill=True) discrete_scatter(X[:, 0], X[:, 1], y) plt.show()