Skip to content

Commit b1a9bf9

Browse files
committed
Adjust so works under Windows
Adjust for windows
1 parent 5eea355 commit b1a9bf9

File tree

3 files changed

+132
-126
lines changed

3 files changed

+132
-126
lines changed

p124_random_forest.py

Lines changed: 30 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -22,31 +22,33 @@
2222
import ocr_utils
2323
from sklearn.preprocessing import StandardScaler
2424

25-
y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,49,50) , columns=(9,17), test_size=0.3, nChars=300, random_state=0)
26-
27-
28-
sc = StandardScaler()
29-
sc.fit(X_train)
30-
X_train_std = sc.transform(X_train)
31-
X_test_std = sc.transform(X_test)
32-
X_combined_std = np.vstack((X_train_std, X_test_std))
33-
y_combined = np.hstack((y_train, y_test))
34-
X_combined = np.vstack((X_train, X_test))
35-
y_combined = np.hstack((y_train, y_test))
36-
37-
from sklearn.ensemble import RandomForestClassifier
38-
39-
forest = RandomForestClassifier(criterion='entropy',
40-
n_estimators=10,
41-
random_state=1,
42-
n_jobs=2)
43-
forest.fit(X_train, y_train)
44-
45-
ocr_utils.plot_decision_regions(X=X_combined,
46-
y=y_combined,
47-
classifier=forest,
48-
labels=labels,
49-
test_idx=range(len(X_test_std),len(X_combined_std)),
50-
title='random_forest')
51-
52-
print ('\n########################### No Errors ####################################')
25+
if __name__ == '__main__':
26+
27+
y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,49,50) , columns=(9,17), test_size=0.3, nChars=300, random_state=0)
28+
29+
30+
sc = StandardScaler()
31+
sc.fit(X_train)
32+
X_train_std = sc.transform(X_train)
33+
X_test_std = sc.transform(X_test)
34+
X_combined_std = np.vstack((X_train_std, X_test_std))
35+
y_combined = np.hstack((y_train, y_test))
36+
X_combined = np.vstack((X_train, X_test))
37+
y_combined = np.hstack((y_train, y_test))
38+
39+
from sklearn.ensemble import RandomForestClassifier
40+
41+
forest = RandomForestClassifier(criterion='entropy',
42+
n_estimators=10,
43+
random_state=1,
44+
n_jobs=2)
45+
forest.fit(X_train, y_train)
46+
47+
ocr_utils.plot_decision_regions(X=X_combined,
48+
y=y_combined,
49+
classifier=forest,
50+
labels=labels,
51+
test_idx=range(len(X_test_std),len(X_combined_std)),
52+
title='random_forest')
53+
54+
print ('\n########################### No Errors ####################################')

p131_principal_component_analysis.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,10 @@
3939
output_feature_list=output_feature_list,
4040
test_size=.2,
4141
random_state=0)
42-
43-
y_train = ds.train.features[0]
44-
X_train_image = ds.train.features[1]
45-
X_train = ds.train.features[2]
42+
windows_limit = 5000 # uses too much memory for my 32 bit windows computer so limit size of sample
43+
y_train = ds.train.features[0][:windows_limit]
44+
X_train_image = ds.train.features[1][:windows_limit]
45+
X_train = ds.train.features[2][:windows_limit]
4646

4747
y_test = ds.test.features[0]
4848
X_test_image = ds.test.features[1]

p181_learning_curves.py

Lines changed: 98 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -27,97 +27,101 @@
2727
from sklearn.linear_model import LogisticRegression
2828
from sklearn.pipeline import Pipeline
2929

30-
y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,51) , columns=(9,17), random_state=0)
31-
32-
pipe_lr = Pipeline([('scl', StandardScaler()),
33-
('clf', LogisticRegression(penalty='l2', random_state=0))])
34-
35-
train_sizes, train_scores, test_scores =\
36-
learning_curve(estimator=pipe_lr,
37-
X=X_train,
38-
y=y_train,
39-
train_sizes=np.linspace(0.1, 1.0, 10),
40-
cv=10,
41-
n_jobs=8)
42-
43-
train_mean = np.mean(train_scores, axis=1)
44-
train_std = np.std(train_scores, axis=1)
45-
test_mean = np.mean(test_scores, axis=1)
46-
test_std = np.std(test_scores, axis=1)
47-
48-
plt.plot(train_sizes, train_mean,
49-
color='blue', marker='o',
50-
markersize=5, label='training accuracy')
51-
52-
plt.fill_between(train_sizes,
53-
train_mean + train_std,
54-
train_mean - train_std,
55-
alpha=0.15, color='blue')
56-
57-
plt.plot(train_sizes, test_mean,
58-
color='green', linestyle='--',
59-
marker='s', markersize=5,
60-
label='validation accuracy')
61-
62-
plt.fill_between(train_sizes,
63-
test_mean + test_std,
64-
test_mean - test_std,
65-
alpha=0.15, color='green')
66-
67-
plt.grid()
68-
plt.xlabel('Number of training samples')
69-
plt.ylabel('Accuracy')
70-
plt.legend(loc='lower right')
71-
plt.ylim([0.8, 1.0])
72-
title='learning_curve'
73-
plt.title(title)
74-
plt.tight_layout()
75-
ocr_utils.show_figures(plt,title)
76-
77-
from sklearn.learning_curve import validation_curve
78-
79-
param_range = [0.001, 0.01, 0.1, 1.0, 10.0, 100.0]
80-
train_scores, test_scores = validation_curve(
81-
estimator=pipe_lr,
82-
X=X_train,
83-
y=y_train,
84-
param_name='clf__C',
85-
param_range=param_range,
86-
cv=10,
87-
n_jobs=8)
88-
89-
train_mean = np.mean(train_scores, axis=1)
90-
train_std = np.std(train_scores, axis=1)
91-
test_mean = np.mean(test_scores, axis=1)
92-
test_std = np.std(test_scores, axis=1)
93-
94-
plt.plot(param_range, train_mean,
95-
color='blue', marker='o',
96-
markersize=5, label='training accuracy')
97-
98-
plt.fill_between(param_range, train_mean + train_std,
99-
train_mean - train_std, alpha=0.15,
100-
color='blue')
101-
102-
plt.plot(param_range, test_mean,
103-
color='green', linestyle='--',
104-
marker='s', markersize=5,
105-
label='validation accuracy')
106-
107-
plt.fill_between(param_range,
108-
test_mean + test_std,
109-
test_mean - test_std,
110-
alpha=0.15, color='green')
111-
112-
plt.grid()
113-
plt.xscale('log')
114-
plt.legend(loc='lower right')
115-
plt.xlabel('Parameter C')
116-
plt.ylabel('Accuracy')
117-
plt.ylim([0.8, 1.0])
118-
title='validation_curve'
119-
plt.title(title)
120-
plt.tight_layout()
121-
ocr_utils.show_figures(plt,title)
122-
123-
print ('\n########################### No Errors ####################################')
30+
if __name__ == '__main__':
31+
32+
33+
y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,51) , columns=(9,17), random_state=0)
34+
35+
pipe_lr = Pipeline([('scl', StandardScaler()),
36+
('clf', LogisticRegression(penalty='l2', random_state=0))])
37+
38+
train_sizes, train_scores, test_scores =\
39+
learning_curve(estimator=pipe_lr,
40+
X=X_train,
41+
y=y_train,
42+
train_sizes=np.linspace(0.1, 1.0, 10),
43+
cv=10,
44+
n_jobs=8)
45+
46+
train_mean = np.mean(train_scores, axis=1)
47+
train_std = np.std(train_scores, axis=1)
48+
test_mean = np.mean(test_scores, axis=1)
49+
test_std = np.std(test_scores, axis=1)
50+
51+
plt.plot(train_sizes, train_mean,
52+
color='blue', marker='o',
53+
markersize=5, label='training accuracy')
54+
55+
plt.fill_between(train_sizes,
56+
train_mean + train_std,
57+
train_mean - train_std,
58+
alpha=0.15, color='blue')
59+
60+
plt.plot(train_sizes, test_mean,
61+
color='green', linestyle='--',
62+
marker='s', markersize=5,
63+
label='validation accuracy')
64+
65+
plt.fill_between(train_sizes,
66+
test_mean + test_std,
67+
test_mean - test_std,
68+
alpha=0.15, color='green')
69+
70+
plt.grid()
71+
plt.xlabel('Number of training samples')
72+
plt.ylabel('Accuracy')
73+
plt.legend(loc='lower right')
74+
plt.ylim([0.8, 1.0])
75+
title='learning_curve'
76+
plt.title(title)
77+
plt.tight_layout()
78+
ocr_utils.show_figures(plt,title)
79+
80+
from sklearn.learning_curve import validation_curve
81+
82+
param_range = [0.001, 0.01, 0.1, 1.0, 10.0, 100.0]
83+
train_scores, test_scores = validation_curve(
84+
estimator=pipe_lr,
85+
X=X_train,
86+
y=y_train,
87+
param_name='clf__C',
88+
param_range=param_range,
89+
cv=10,
90+
n_jobs=8)
91+
92+
train_mean = np.mean(train_scores, axis=1)
93+
train_std = np.std(train_scores, axis=1)
94+
test_mean = np.mean(test_scores, axis=1)
95+
test_std = np.std(test_scores, axis=1)
96+
97+
plt.plot(param_range, train_mean,
98+
color='blue', marker='o',
99+
markersize=5, label='training accuracy')
100+
101+
plt.fill_between(param_range, train_mean + train_std,
102+
train_mean - train_std, alpha=0.15,
103+
color='blue')
104+
105+
plt.plot(param_range, test_mean,
106+
color='green', linestyle='--',
107+
marker='s', markersize=5,
108+
label='validation accuracy')
109+
110+
plt.fill_between(param_range,
111+
test_mean + test_std,
112+
test_mean - test_std,
113+
alpha=0.15, color='green')
114+
115+
plt.grid()
116+
plt.xscale('log')
117+
plt.legend(loc='lower right')
118+
plt.xlabel('Parameter C')
119+
plt.ylabel('Accuracy')
120+
plt.ylim([0.8, 1.0])
121+
title='validation_curve'
122+
plt.title(title)
123+
plt.tight_layout()
124+
ocr_utils.show_figures(plt,title)
125+
126+
print ('\n########################### No Errors ####################################')
127+

0 commit comments

Comments
 (0)