Skip to content

Commit 77289e4

Browse files
committed
Fix parallel processing on Windows
1 parent 2e582d9 commit 77289e4

File tree

1 file changed

+144
-143
lines changed

1 file changed

+144
-143
lines changed

p177_k_fold_cross_validation.py

+144-143
Original file line numberDiff line numberDiff line change
@@ -29,153 +29,154 @@
2929
from sklearn.cross_validation import StratifiedKFold
3030
from sklearn.lda import LDA
3131

32-
#charsToTrain=range(48,58)
33-
chars_to_train = range(48,58)
34-
35-
num_chars = 3000 #limit the number to speed up the calculation
36-
37-
input_filters_dict = {'m_label': chars_to_train, 'font': 'E13B'}
38-
39-
# output the character label and the image and column sums
40-
output_feature_list = ['m_label','image']
41-
42-
# read the complete image (20x20) = 400 pixels for each character
43-
ds = ocr_utils.read_data(input_filters_dict=input_filters_dict,
44-
output_feature_list=output_feature_list,
45-
random_state=0)
46-
47-
y_train = ds.train.features[0][:num_chars]
48-
X_train = ds.train.features[1][:num_chars]
49-
50-
# y_test = ds.test.features[0]-48
51-
# X_test = ds.test.features[1]
52-
# y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = charsToTrain , columns=range(0,20), nChars=1000, test_size=0.3,random_state=0)
53-
54-
from sklearn.linear_model import LogisticRegression
55-
from sklearn.cross_validation import train_test_split
56-
57-
X_train , X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.3, random_state=0)
58-
59-
from sklearn.preprocessing import StandardScaler
60-
#
61-
# sc = StandardScaler()
62-
# X_train_std = sc.fit_transform(X_train)
63-
# X_test_std = sc.fit_transform(X_test)
64-
65-
# X_train, X_test, y_train, y_test = \
66-
# train_test_split(X, y, test_size=0.20, random_state=1)
67-
68-
from sklearn.decomposition import PCA
69-
70-
from sklearn.pipeline import Pipeline
71-
72-
num_planes = range(2,12)
73-
74-
pca_scores =[]
75-
pca_std_dev =[]
76-
for num_PCA in num_planes:
77-
print ('number of Principal Components = {}'.format(num_PCA))
78-
pipe_lr = Pipeline([('scl', StandardScaler()),
79-
('pca', PCA(n_components=num_PCA)),
80-
('clf', LogisticRegression(random_state=1))])
32+
if __name__ == '__main__':
33+
#charsToTrain=range(48,58)
34+
chars_to_train = range(48,58)
8135

82-
pipe_lr.fit(X_train, y_train)
83-
print('Test Accuracy: %.3f' % pipe_lr.score(X_test, y_test))
36+
num_chars = 3000 #limit the number to speed up the calculation
8437

85-
38+
input_filters_dict = {'m_label': chars_to_train, 'font': 'E13B'}
39+
40+
# output the character label and the image and column sums
41+
output_feature_list = ['m_label','image']
42+
43+
# read the complete image (20x20) = 400 pixels for each character
44+
ds = ocr_utils.read_data(input_filters_dict=input_filters_dict,
45+
output_feature_list=output_feature_list,
46+
random_state=0)
47+
48+
y_train = ds.train.features[0][:num_chars]
49+
X_train = ds.train.features[1][:num_chars]
50+
51+
# y_test = ds.test.features[0]-48
52+
# X_test = ds.test.features[1]
53+
# y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = charsToTrain , columns=range(0,20), nChars=1000, test_size=0.3,random_state=0)
54+
55+
from sklearn.linear_model import LogisticRegression
56+
from sklearn.cross_validation import train_test_split
57+
58+
X_train , X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.3, random_state=0)
59+
60+
from sklearn.preprocessing import StandardScaler
61+
#
62+
# sc = StandardScaler()
63+
# X_train_std = sc.fit_transform(X_train)
64+
# X_test_std = sc.fit_transform(X_test)
65+
66+
# X_train, X_test, y_train, y_test = \
67+
# train_test_split(X, y, test_size=0.20, random_state=1)
68+
69+
from sklearn.decomposition import PCA
70+
71+
from sklearn.pipeline import Pipeline
72+
73+
num_planes = range(2,12)
8674

87-
kfold = StratifiedKFold(y=y_train,
88-
n_folds=10,
89-
random_state=1)
90-
91-
scores = []
92-
for k, (train, test) in enumerate(kfold):
93-
pipe_lr.fit(X_train[train], y_train[train])
94-
score = pipe_lr.score(X_train[test], y_train[test])
95-
scores.append(score)
96-
#print ('train {} samples: {}'.format(len(train), train))
97-
#print('Fold: %s, Class dist.: %s, Acc: %.3f' % (k+1, np.bincount(y_train[train])[list(charsToTrain)], score))
75+
pca_scores =[]
76+
pca_std_dev =[]
77+
for num_PCA in num_planes:
78+
print ('number of Principal Components = {}'.format(num_PCA))
79+
pipe_lr = Pipeline([('scl', StandardScaler()),
80+
('pca', PCA(n_components=num_PCA)),
81+
('clf', LogisticRegression(random_state=1))])
82+
83+
pipe_lr.fit(X_train, y_train)
84+
print('Test Accuracy: %.3f' % pipe_lr.score(X_test, y_test))
9885

99-
print('\nCV accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))
100-
from sklearn.cross_validation import cross_val_score
101-
102-
scores = cross_val_score(estimator=pipe_lr,
103-
X=X_train,
104-
y=y_train,
105-
cv=10,
106-
n_jobs=8)
107-
print('CV accuracy scores: %s' % scores)
108-
print('CV accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))
109-
pca_scores.append(np.mean(scores))
110-
pca_std_dev.append(np.std(scores))
111-
112-
plt.plot(num_planes, pca_scores, marker='o')
113-
plt.ylabel('Accuracy')
114-
plt.xlabel('number of Principal Components')
115-
title = 'Accuracy versus number of Principal Components'
116-
plt.title(title)
117-
plt.tight_layout()
118-
ocr_utils.show_figures(plt, title)
119-
120-
plt.plot(num_planes, pca_std_dev, marker='o')
121-
plt.ylabel('Standard Deviation')
122-
plt.xlabel('number of Principal Components')
123-
title = 'Standard Deviation versus number of Principal Components'
124-
plt.title(title)
125-
plt.tight_layout()
126-
ocr_utils.show_figures(plt, title)
127-
128-
pca_scores =[]
129-
pca_std_dev =[]
130-
for num_LDA in num_planes:
131-
print ('number of Principal Components = {}'.format(num_LDA))
132-
pipe_lr = Pipeline([('scl', StandardScaler()),
133-
('lda', LDA(n_components=num_LDA)),
134-
('clf', LogisticRegression(random_state=1))])
135-
136-
pipe_lr.fit(X_train, y_train)
137-
print('Test Accuracy: %.3f' % pipe_lr.score(X_test, y_test))
138-
13986

140-
kfold = StratifiedKFold(y=y_train,
141-
n_folds=10,
142-
random_state=1)
143-
144-
scores = []
145-
for k, (train, test) in enumerate(kfold):
146-
pipe_lr.fit(X_train[train], y_train[train])
147-
score = pipe_lr.score(X_train[test], y_train[test])
148-
scores.append(score)
149-
#print ('train {} samples: {}'.format(len(train), train))
150-
#print('Fold: %s, Class dist.: %s, Acc: %.3f' % (k+1, np.bincount(y_train[train])[list(charsToTrain)], score))
15187

152-
print('\nCV accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))
153-
88+
kfold = StratifiedKFold(y=y_train,
89+
n_folds=10,
90+
random_state=1)
91+
92+
scores = []
93+
for k, (train, test) in enumerate(kfold):
94+
pipe_lr.fit(X_train[train], y_train[train])
95+
score = pipe_lr.score(X_train[test], y_train[test])
96+
scores.append(score)
97+
#print ('train {} samples: {}'.format(len(train), train))
98+
#print('Fold: %s, Class dist.: %s, Acc: %.3f' % (k+1, np.bincount(y_train[train])[list(charsToTrain)], score))
99+
100+
print('\nCV accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))
101+
from sklearn.cross_validation import cross_val_score
102+
103+
scores = cross_val_score(estimator=pipe_lr,
104+
X=X_train,
105+
y=y_train,
106+
cv=10,
107+
n_jobs=-1)
108+
print('CV accuracy scores: %s' % scores)
109+
print('CV accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))
110+
pca_scores.append(np.mean(scores))
111+
pca_std_dev.append(np.std(scores))
154112

155-
scores = cross_val_score(estimator=pipe_lr,
156-
X=X_train,
157-
y=y_train,
158-
cv=10,
159-
n_jobs=8)
160-
print('CV accuracy scores: %s' % scores)
161-
print('CV accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))
162-
pca_scores.append(np.mean(scores))
163-
pca_std_dev.append(np.std(scores))
164-
165-
plt.plot(num_planes, pca_scores, marker='o')
166-
plt.ylabel('Accuracy')
167-
plt.xlabel('number of Linear Discriminants')
168-
title = 'Accuracy versus number of Linear Discriminants'
169-
plt.title(title)
170-
plt.tight_layout()
171-
ocr_utils.show_figures(plt, title)
172-
173-
plt.plot(num_planes, pca_std_dev, marker='o')
174-
plt.ylabel('Standard Deviation')
175-
plt.xlabel('number of Linear Discriminants')
176-
title = 'Standard Deviation versus number of Linear Discriminants'
177-
plt.title(title)
178-
plt.tight_layout()
179-
ocr_utils.show_figures(plt, title)
180-
181-
print ('\n########################### No Errors ####################################')
113+
plt.plot(num_planes, pca_scores, marker='o')
114+
plt.ylabel('Accuracy')
115+
plt.xlabel('number of Principal Components')
116+
title = 'Accuracy versus number of Principal Components'
117+
plt.title(title)
118+
plt.tight_layout()
119+
ocr_utils.show_figures(plt, title)
120+
121+
plt.plot(num_planes, pca_std_dev, marker='o')
122+
plt.ylabel('Standard Deviation')
123+
plt.xlabel('number of Principal Components')
124+
title = 'Standard Deviation versus number of Principal Components'
125+
plt.title(title)
126+
plt.tight_layout()
127+
ocr_utils.show_figures(plt, title)
128+
129+
pca_scores =[]
130+
pca_std_dev =[]
131+
for num_LDA in num_planes:
132+
print ('number of Principal Components = {}'.format(num_LDA))
133+
pipe_lr = Pipeline([('scl', StandardScaler()),
134+
('lda', LDA(n_components=num_LDA)),
135+
('clf', LogisticRegression(random_state=1))])
136+
137+
pipe_lr.fit(X_train, y_train)
138+
print('Test Accuracy: %.3f' % pipe_lr.score(X_test, y_test))
139+
140+
141+
kfold = StratifiedKFold(y=y_train,
142+
n_folds=10,
143+
random_state=1)
144+
145+
scores = []
146+
for k, (train, test) in enumerate(kfold):
147+
pipe_lr.fit(X_train[train], y_train[train])
148+
score = pipe_lr.score(X_train[test], y_train[test])
149+
scores.append(score)
150+
#print ('train {} samples: {}'.format(len(train), train))
151+
#print('Fold: %s, Class dist.: %s, Acc: %.3f' % (k+1, np.bincount(y_train[train])[list(charsToTrain)], score))
152+
153+
print('\nCV accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))
154+
155+
156+
scores = cross_val_score(estimator=pipe_lr,
157+
X=X_train,
158+
y=y_train,
159+
cv=10,
160+
n_jobs=-1)
161+
print('CV accuracy scores: %s' % scores)
162+
print('CV accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))
163+
pca_scores.append(np.mean(scores))
164+
pca_std_dev.append(np.std(scores))
165+
166+
plt.plot(num_planes, pca_scores, marker='o')
167+
plt.ylabel('Accuracy')
168+
plt.xlabel('number of Linear Discriminants')
169+
title = 'Accuracy versus number of Linear Discriminants'
170+
plt.title(title)
171+
plt.tight_layout()
172+
ocr_utils.show_figures(plt, title)
173+
174+
plt.plot(num_planes, pca_std_dev, marker='o')
175+
plt.ylabel('Standard Deviation')
176+
plt.xlabel('number of Linear Discriminants')
177+
title = 'Standard Deviation versus number of Linear Discriminants'
178+
plt.title(title)
179+
plt.tight_layout()
180+
ocr_utils.show_figures(plt, title)
181+
182+
print ('\n########################### No Errors ####################################')

0 commit comments

Comments
 (0)