|
2 | 2 | import matplotlib.pyplot as plt
|
3 | 3 | from .plot_helpers import cm2, cm3, discrete_scatter
|
4 | 4 |
|
| 5 | +def _call_classifier_chunked(classifier_pred_or_decide, X): |
| 6 | + |
| 7 | + |
| 8 | + # The chunk_size is used to chunk the large arrays to work with x86 memory |
| 9 | + # models that are restricted to < 2 GB in memory allocation. |
| 10 | + # The chunk_size value used here is based on a measurement with the MLPClassifier |
| 11 | + # using the following parameters: |
| 12 | + # MLPClassifier(solver='lbfgs', random_state=0, hidden_layer_sizes=[1000,1000,1000]) |
| 13 | + # by reducing the value it is possible to trade in time for memory. |
| 14 | + # It is possible to chunk the array as the calculations are independent of each other. |
| 15 | + # Note: an intermittent version made a distinction between 32- and 64 bit architectures |
| 16 | + # avoiding the chunking. Testing revealed that even on 64 bit architectures the chunking |
| 17 | + # increases the performance by a factor of 3-5, largely due to the avoidance of memory |
| 18 | + # swapping. |
| 19 | + chunk_size = 10000 |
| 20 | + X_axis0_size = X.shape[0] |
| 21 | + |
| 22 | + # We use a list to collect all result chunks |
| 23 | + Y_result_chunks = [] |
| 24 | + |
| 25 | + # Call the classifier in chunks. |
| 26 | + y_chunk_pos = 0 |
| 27 | + for x_chunk in np.array_split(X, np.arange(chunk_size,X_axis0_size,chunk_size,dtype=np.int32), axis=0): |
| 28 | + Y_result_chunks.append(classifier_pred_or_decide(x_chunk)) |
| 29 | + y_chunk_pos += x_chunk.shape[0] |
| 30 | + |
| 31 | + return np.concatenate(Y_result_chunks) |
| 32 | + |
5 | 33 |
|
6 | 34 | def plot_2d_classification(classifier, X, fill=False, ax=None, eps=None,
|
7 | 35 | alpha=1, cm=cm3):
|
@@ -82,14 +110,14 @@ def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1,
|
82 | 110 |
|
83 | 111 | X1, X2 = np.meshgrid(xx, yy)
|
84 | 112 | X_grid = np.c_[X1.ravel(), X2.ravel()]
|
85 |
| - try: |
86 |
| - decision_values = classifier.decision_function(X_grid) |
| 113 | + if hasattr(classifier, "decision_function"): |
| 114 | + decision_values = _call_classifier_chunked(classifier.decision_function, X_grid) |
87 | 115 | levels = [0] if threshold is None else [threshold]
|
88 | 116 | fill_levels = [decision_values.min()] + levels + [
|
89 | 117 | decision_values.max()]
|
90 |
| - except AttributeError: |
| 118 | + else: |
91 | 119 | # no decision_function
|
92 |
| - decision_values = classifier.predict_proba(X_grid)[:, 1] |
| 120 | + decision_values = _call_classifier_chunked(classifier.predict_proba, X_grid)[:, 1] |
93 | 121 | levels = [.5] if threshold is None else [threshold]
|
94 | 122 | fill_levels = [0] + levels + [1]
|
95 | 123 | if fill:
|
|
0 commit comments