2
2
import matplotlib .pyplot as plt
3
3
from .plot_helpers import cm2 , cm3 , discrete_scatter
4
4
5
+ def _call_classifier_chunked (classifier_pred_or_decide , X ):
6
+ # The chunk_size is used to chunk the large arrays to work with x86
7
+ # memory models that are restricted to < 2 GB in memory allocation. The
8
+ # chunk_size value used here is based on a measurement with the
9
+ # MLPClassifier using the following parameters:
10
+ # MLPClassifier(solver='lbfgs', random_state=0,
11
+ # hidden_layer_sizes=[1000,1000,1000])
12
+ # by reducing the value it is possible to trade in time for memory.
13
+ # It is possible to chunk the array as the calculations are independent of
14
+ # each other.
15
+ # Note: an intermittent version made a distinction between
16
+ # 32- and 64 bit architectures avoiding the chunking. Testing revealed
17
+ # that even on 64 bit architectures the chunking increases the
18
+ # performance by a factor of 3-5, largely due to the avoidance of memory
19
+ # swapping.
20
+ chunk_size = 10000
21
+
22
+ # We use a list to collect all result chunks
23
+ Y_result_chunks = []
24
+
25
+ # Call the classifier in chunks.
26
+ for x_chunk in np .array_split (X , np .arange (chunk_size , X .shape [0 ],
27
+ chunk_size , dtype = np .int32 ),
28
+ axis = 0 ):
29
+ Y_result_chunks .append (classifier_pred_or_decide (x_chunk ))
30
+
31
+ return np .concatenate (Y_result_chunks )
32
+
5
33
6
34
def plot_2d_classification (classifier , X , fill = False , ax = None , eps = None ,
7
35
alpha = 1 , cm = cm3 ):
@@ -82,14 +110,16 @@ def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1,
82
110
83
111
X1 , X2 = np .meshgrid (xx , yy )
84
112
X_grid = np .c_ [X1 .ravel (), X2 .ravel ()]
85
- try :
86
- decision_values = classifier .decision_function (X_grid )
113
+ if hasattr (classifier , "decision_function" ):
114
+ decision_values = _call_classifier_chunked (classifier .decision_function ,
115
+ X_grid )
87
116
levels = [0 ] if threshold is None else [threshold ]
88
117
fill_levels = [decision_values .min ()] + levels + [
89
118
decision_values .max ()]
90
- except AttributeError :
119
+ else :
91
120
# no decision_function
92
- decision_values = classifier .predict_proba (X_grid )[:, 1 ]
121
+ decision_values = _call_classifier_chunked (classifier .predict_proba ,
122
+ X_grid )[:, 1 ]
93
123
levels = [.5 ] if threshold is None else [threshold ]
94
124
fill_levels = [0 ] + levels + [1 ]
95
125
if fill :
@@ -110,7 +140,7 @@ def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1,
110
140
from sklearn .datasets import make_blobs
111
141
from sklearn .linear_model import LogisticRegression
112
142
X , y = make_blobs (centers = 2 , random_state = 42 )
113
- clf = LogisticRegression ().fit (X , y )
143
+ clf = LogisticRegression (solver = 'lbfgs' ).fit (X , y )
114
144
plot_2d_separator (clf , X , fill = True )
115
145
discrete_scatter (X [:, 0 ], X [:, 1 ], y )
116
146
plt .show ()
0 commit comments