3
3
from .plot_helpers import cm2 , cm3 , discrete_scatter
4
4
5
5
def _call_classifier_chunked (classifier_pred_or_decide , X ):
6
-
7
-
8
- # The chunk_size is used to chunk the large arrays to work with x86 memory
9
- # models that are restricted to < 2 GB in memory allocation.
10
- # The chunk_size value used here is based on a measurement with the MLPClassifier
11
- # using the following parameters:
12
- # MLPClassifier(solver='lbfgs', random_state=0, hidden_layer_sizes=[1000,1000,1000])
6
+ # The chunk_size is used to chunk the large arrays to work with x86
7
+ # memory models that are restricted to < 2 GB in memory allocation. The
8
+ # chunk_size value used here is based on a measurement with the
9
+ # MLPClassifier using the following parameters:
10
+ # MLPClassifier(solver='lbfgs', random_state=0,
11
+ # hidden_layer_sizes=[1000,1000,1000])
13
12
# by reducing the value it is possible to trade in time for memory.
14
- # It is possible to chunk the array as the calculations are independent of each other.
15
- # Note: an intermittent version made a distinction between 32- and 64 bit architectures
16
- # avoiding the chunking. Testing revealed that even on 64 bit architectures the chunking
17
- # increases the performance by a factor of 3-5, largely due to the avoidance of memory
13
+ # It is possible to chunk the array as the calculations are independent of
14
+ # each other.
15
+ # Note: an intermittent version made a distinction between
16
+ # 32- and 64 bit architectures avoiding the chunking. Testing revealed
17
+ # that even on 64 bit architectures the chunking increases the
18
+ # performance by a factor of 3-5, largely due to the avoidance of memory
18
19
# swapping.
19
20
chunk_size = 10000
20
- X_axis0_size = X .shape [0 ]
21
21
22
22
# We use a list to collect all result chunks
23
23
Y_result_chunks = []
24
24
25
25
# Call the classifier in chunks.
26
- y_chunk_pos = 0
27
- for x_chunk in np .array_split (X , np .arange (chunk_size ,X_axis0_size ,chunk_size ,dtype = np .int32 ), axis = 0 ):
26
+ for x_chunk in np .array_split (X , np .arange (chunk_size , X .shape [0 ],
27
+ chunk_size , dtype = np .int32 ),
28
+ axis = 0 ):
28
29
Y_result_chunks .append (classifier_pred_or_decide (x_chunk ))
29
- y_chunk_pos += x_chunk .shape [0 ]
30
30
31
31
return np .concatenate (Y_result_chunks )
32
32
@@ -111,13 +111,15 @@ def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1,
111
111
X1 , X2 = np .meshgrid (xx , yy )
112
112
X_grid = np .c_ [X1 .ravel (), X2 .ravel ()]
113
113
if hasattr (classifier , "decision_function" ):
114
- decision_values = _call_classifier_chunked (classifier .decision_function , X_grid )
114
+ decision_values = _call_classifier_chunked (classifier .decision_function ,
115
+ X_grid )
115
116
levels = [0 ] if threshold is None else [threshold ]
116
117
fill_levels = [decision_values .min ()] + levels + [
117
118
decision_values .max ()]
118
119
else :
119
120
# no decision_function
120
- decision_values = _call_classifier_chunked (classifier .predict_proba , X_grid )[:, 1 ]
121
+ decision_values = _call_classifier_chunked (classifier .predict_proba ,
122
+ X_grid )[:, 1 ]
121
123
levels = [.5 ] if threshold is None else [threshold ]
122
124
fill_levels = [0 ] + levels + [1 ]
123
125
if fill :
@@ -138,7 +140,7 @@ def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1,
138
140
from sklearn .datasets import make_blobs
139
141
from sklearn .linear_model import LogisticRegression
140
142
X , y = make_blobs (centers = 2 , random_state = 42 )
141
- clf = LogisticRegression ().fit (X , y )
143
+ clf = LogisticRegression (solver = 'lbfgs' ).fit (X , y )
142
144
plot_2d_separator (clf , X , fill = True )
143
145
discrete_scatter (X [:, 0 ], X [:, 1 ], y )
144
146
plt .show ()
0 commit comments