Skip to content

Commit 5eea355

Browse files
committed
Multiprocessing fix
Modification so will run on Windows without mulitprocessing
1 parent 77289e4 commit 5eea355

5 files changed

+295
-289
lines changed

p186_grid_search.py

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -16,34 +16,36 @@
1616
from sklearn.grid_search import GridSearchCV
1717
from sklearn.svm import SVC
1818

19-
y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,51) , test_size=0.3, columns=(9,17), random_state=0)
19+
if __name__ == '__main__':
2020

21-
pipe_svc = Pipeline([('scl', StandardScaler()),
22-
('clf', SVC(random_state=1))])
21+
y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,51) , test_size=0.3, columns=(9,17), random_state=0)
2322

24-
param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]
23+
pipe_svc = Pipeline([('scl', StandardScaler()),
24+
('clf', SVC(random_state=1))])
2525

26-
param_grid = [{'clf__C': param_range,
27-
'clf__kernel': ['linear']},
28-
{'clf__C': param_range,
29-
'clf__gamma': param_range,
30-
'clf__kernel': ['rbf']}]
26+
param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]
3127

32-
gs = GridSearchCV(estimator=pipe_svc,
33-
param_grid=param_grid,
34-
scoring='accuracy',
35-
cv=10,
36-
n_jobs=-1)
37-
gs = gs.fit(X_train, y_train)
28+
param_grid = [{'clf__C': param_range,
29+
'clf__kernel': ['linear']},
30+
{'clf__C': param_range,
31+
'clf__gamma': param_range,
32+
'clf__kernel': ['rbf']}]
3833

39-
print('Support Vector Machine Grid Search best score: {}'.format(gs.best_score_))
40-
print('Support Vector Machine Grid Search best params: {}'.format(gs.best_params_))
34+
gs = GridSearchCV(estimator=pipe_svc,
35+
param_grid=param_grid,
36+
scoring='accuracy',
37+
cv=10,
38+
n_jobs=-1)
39+
gs = gs.fit(X_train, y_train)
4140

42-
clf = gs.best_estimator_
43-
clf.fit(X_train, y_train)
44-
print('Support Vector Machine Test accuracy: %.3f' % clf.score(X_test, y_test))
41+
print('Support Vector Machine Grid Search best score: {}'.format(gs.best_score_))
42+
print('Support Vector Machine Grid Search best params: {}'.format(gs.best_params_))
4543

46-
print ('\n########################### No Errors ####################################')
44+
clf = gs.best_estimator_
45+
clf.fit(X_train, y_train)
46+
print('Support Vector Machine Test accuracy: %.3f' % clf.score(X_test, y_test))
47+
48+
print ('\n########################### No Errors ####################################')
4749

4850

4951

p189_nested_cross_validation.py

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -36,47 +36,47 @@
3636
from sklearn.cross_validation import cross_val_score
3737
from sklearn.grid_search import GridSearchCV
3838
from sklearn.svm import SVC
39+
if __name__ == '__main__':
40+
y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,51) , test_size=0.3, columns=(9,17), random_state=0)
3941

40-
y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,51) , test_size=0.3, columns=(9,17), random_state=0)
4142

43+
pipe_svc = Pipeline([('scl', StandardScaler()),
44+
('clf', SVC(random_state=1))])
4245

43-
pipe_svc = Pipeline([('scl', StandardScaler()),
44-
('clf', SVC(random_state=1))])
46+
c_gamma_range = [0.01, 0.1, 1.0, 10.0]
47+
48+
param_grid = [{'clf__C': c_gamma_range,
49+
'clf__kernel': ['linear']},
50+
{'clf__C': c_gamma_range,
51+
'clf__gamma': c_gamma_range,
52+
'clf__kernel': ['rbf'],}]
4553

46-
c_gamma_range = [0.01, 0.1, 1.0, 10.0]
47-
48-
param_grid = [{'clf__C': c_gamma_range,
49-
'clf__kernel': ['linear']},
50-
{'clf__C': c_gamma_range,
51-
'clf__gamma': c_gamma_range,
52-
'clf__kernel': ['rbf'],}]
54+
gs = GridSearchCV(estimator=pipe_svc,
55+
param_grid=param_grid,
56+
scoring='accuracy',
57+
cv=5,
58+
n_jobs=-1)
5359

54-
gs = GridSearchCV(estimator=pipe_svc,
55-
param_grid=param_grid,
56-
scoring='accuracy',
57-
cv=5,
58-
n_jobs=-1)
5960

61+
scores = cross_val_score(gs, X_train, y_train, scoring='accuracy', cv=5)
62+
print('\nSupport Vector Cross Validation accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))
6063

61-
scores = cross_val_score(gs, X_train, y_train, scoring='accuracy', cv=5)
62-
print('\nSupport Vector Cross Validation accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))
64+
gs = gs.fit(X_train, y_train)
65+
print('Support Vector Machine Grid Search best score: {}'.format(gs.best_score_))
66+
print('Support Vector Machine Grid Search best params: {}\n'.format(gs.best_params_))
6367

64-
gs = gs.fit(X_train, y_train)
65-
print('Support Vector Machine Grid Search best score: {}'.format(gs.best_score_))
66-
print('Support Vector Machine Grid Search best params: {}\n'.format(gs.best_params_))
68+
from sklearn.tree import DecisionTreeClassifier
69+
gs = GridSearchCV(estimator=DecisionTreeClassifier(random_state=0),
70+
param_grid=[{'max_depth': [1, 2, 3, 4, 5, 6, 7, None]}],
71+
scoring='accuracy',
72+
cv=5)
6773

68-
from sklearn.tree import DecisionTreeClassifier
69-
gs = GridSearchCV(estimator=DecisionTreeClassifier(random_state=0),
70-
param_grid=[{'max_depth': [1, 2, 3, 4, 5, 6, 7, None]}],
71-
scoring='accuracy',
72-
cv=5)
7374

75+
scores = cross_val_score(gs, X_train, y_train, scoring='accuracy', cv=5)
76+
print('Decision Tree Cross Validation accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))
7477

75-
scores = cross_val_score(gs, X_train, y_train, scoring='accuracy', cv=5)
76-
print('Decision Tree Cross Validation accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))
78+
gs = gs.fit(X_train, y_train)
79+
print('Decision Tree Grid Search best score: {}'.format(gs.best_score_))
80+
print('Decision Tree Grid Search best params: {}'.format(gs.best_params_))
7781

78-
gs = gs.fit(X_train, y_train)
79-
print('Decision Tree Grid Search best score: {}'.format(gs.best_score_))
80-
print('Decision Tree Grid Search best params: {}'.format(gs.best_params_))
81-
82-
print ('\n########################### No Errors ####################################')
82+
print ('\n########################### No Errors ####################################')

p193_model_precision_recall.py

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -30,45 +30,47 @@
3030
from sklearn.grid_search import GridSearchCV
3131
from sklearn.metrics import make_scorer,precision_score, recall_score, f1_score
3232
from sklearn.cross_validation import train_test_split
33-
y, X, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,51) , columns=(9,17), random_state=0)
3433

35-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1)
34+
if __name__ == '__main__':
35+
y, X, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,51) , columns=(9,17), random_state=0)
3636

37-
pipe_svc = Pipeline([('scl', StandardScaler()),
38-
('clf', SVC(random_state=1))])
37+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1)
3938

40-
param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]
39+
pipe_svc = Pipeline([('scl', StandardScaler()),
40+
('clf', SVC(random_state=1))])
4141

42-
param_grid = [{'clf__C': param_range,
43-
'clf__kernel': ['linear']},
44-
{'clf__C': param_range,
45-
'clf__gamma': param_range,
46-
'clf__kernel': ['rbf']}]
47-
pipe_svc.fit(X_train, y_train)
48-
y_pred = pipe_svc.predict(X_test)
42+
param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]
4943

50-
pos_label=y_train[0]
51-
print('Precision: %.3f' % precision_score(y_true=y_test, y_pred=y_pred, pos_label=pos_label))
52-
print('Recall: %.3f' % recall_score(y_true=y_test, y_pred=y_pred, pos_label=pos_label))
53-
print('F1: %.3f' % f1_score(y_true=y_test, y_pred=y_pred, pos_label=pos_label))
44+
param_grid = [{'clf__C': param_range,
45+
'clf__kernel': ['linear']},
46+
{'clf__C': param_range,
47+
'clf__gamma': param_range,
48+
'clf__kernel': ['rbf']}]
49+
pipe_svc.fit(X_train, y_train)
50+
y_pred = pipe_svc.predict(X_test)
5451

55-
scorer = make_scorer(f1_score, pos_label=pos_label)
52+
pos_label=y_train[0]
53+
print('Precision: %.3f' % precision_score(y_true=y_test, y_pred=y_pred, pos_label=pos_label))
54+
print('Recall: %.3f' % recall_score(y_true=y_test, y_pred=y_pred, pos_label=pos_label))
55+
print('F1: %.3f' % f1_score(y_true=y_test, y_pred=y_pred, pos_label=pos_label))
5656

57-
c_gamma_range = [0.01, 0.1, 1.0, 10.0]
57+
scorer = make_scorer(f1_score, pos_label=pos_label)
5858

59-
param_grid = [{'clf__C': c_gamma_range,
60-
'clf__kernel': ['linear']},
61-
{'clf__C': c_gamma_range,
62-
'clf__gamma': c_gamma_range,
63-
'clf__kernel': ['rbf'],}]
59+
c_gamma_range = [0.01, 0.1, 1.0, 10.0]
6460

65-
gs = GridSearchCV(estimator=pipe_svc,
66-
param_grid=param_grid,
67-
scoring=scorer,
68-
cv=10,
69-
n_jobs=-1)
70-
gs = gs.fit(X_train, y_train)
71-
print('\nGrid Search f1 scoring best score: {}'.format(gs.best_score_))
72-
print('Grid Search f1 scoring best params: {}'.format(gs.best_params_))
61+
param_grid = [{'clf__C': c_gamma_range,
62+
'clf__kernel': ['linear']},
63+
{'clf__C': c_gamma_range,
64+
'clf__gamma': c_gamma_range,
65+
'clf__kernel': ['rbf'],}]
7366

74-
print ('\n########################### No Errors ####################################')
67+
gs = GridSearchCV(estimator=pipe_svc,
68+
param_grid=param_grid,
69+
scoring=scorer,
70+
cv=10,
71+
n_jobs=-1)
72+
gs = gs.fit(X_train, y_train)
73+
print('\nGrid Search f1 scoring best score: {}'.format(gs.best_score_))
74+
print('Grid Search f1 scoring best params: {}'.format(gs.best_params_))
75+
76+
print ('\n########################### No Errors ####################################')

0 commit comments

Comments
 (0)