forked from pdeitel/PythonFundamentalsLiveLessons
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path14_02-03.py
executable file
·168 lines (113 loc) · 4.81 KB
/
14_02-03.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# This file contains Sections 14.2 and 14.3
# 14.2 Case Study: Classification with k-Nearest Neighbors and the Digits Dataset, Part 1
# 14.2.2 Loading the Dataset
from sklearn.datasets import load_digits
digits = load_digits()
# Displaying the Description
print(digits.DESCR)
# Checking the Sample and Target Sizes
digits.target[::100]
digits.data.shape
digits.target.shape
# A Sample Digit Image
digits.images[13]
# Preparing the Data for Use with Scikit-Learn
digits.data[13]
# 14.2.3 Visualizing the Data
# Creating the Diagram
import matplotlib.pyplot as plt
figure, axes = plt.subplots(nrows=4, ncols=6, figsize=(6, 4))
# Displaying Each Image and Removing the Axes Labels
for item in zip(axes.ravel(), digits.images, digits.target):
axes, image, target = item
axes.imshow(image, cmap=plt.cm.gray_r)
axes.set_xticks([]) # remove x-axis tick marks
axes.set_yticks([]) # remove y-axis tick marks
axes.set_title(target)
plt.tight_layout()
# 14.2.4 Splitting the Data for Training and Testing
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
digits.data, digits.target, random_state=11)
# Training and Testing Set Sizes
X_train.shape
X_test.shape
# 14.2.5 Creating the Model
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
# 14.2.6 Training the Model
knn.fit(X=X_train, y=y_train)
# 14.2.7 Predicting Digit Classes
predicted = knn.predict(X=X_test)
expected = y_test
predicted[:20]
expected[:20]
wrong = [(p, e) for (p, e) in zip(predicted, expected) if p != e]
wrong
# 14.3 Case Study: Classification with k-Nearest Neighbors and the Digits Dataset, Part 2
# 14.3.1 Metrics for Model Accuracy
# Estimator Method score
print(f'{knn.score(X_test, y_test):.2%}')
# Confusion Matrix
from sklearn.metrics import confusion_matrix
confusion = confusion_matrix(y_true=expected, y_pred=predicted)
confusion
# Classification Report
from sklearn.metrics import classification_report
names = [str(digit) for digit in digits.target_names]
print(classification_report(expected, predicted,
target_names=names))
# Visualizing the Confusion Matrix
import pandas as pd
confusion_df = pd.DataFrame(confusion, index=range(10),
columns=range(10))
import seaborn as sns
axes = sns.heatmap(confusion_df, annot=True,
cmap='nipy_spectral_r')
# 14.3.2 K-Fold Cross-Validation
# KFold Class
from sklearn.model_selection import KFold
kfold = KFold(n_splits=10, random_state=11, shuffle=True)
# Using the KFold Object with Function cross_val_score
from sklearn.model_selection import cross_val_score
scores = cross_val_score(estimator=knn, X=digits.data,
y=digits.target, cv=kfold)
scores
print(f'Mean accuracy: {scores.mean():.2%}')
print(f'Accuracy standard deviation: {scores.std():.2%}')
# 14.3.3 Running Multiple Models to Find the Best One
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
estimators = {
'KNeighborsClassifier': knn,
'SVC': SVC(gamma='scale'),
'GaussianNB': GaussianNB()}
for estimator_name, estimator_object in estimators.items():
kfold = KFold(n_splits=10, random_state=11, shuffle=True)
scores = cross_val_score(estimator=estimator_object,
X=digits.data, y=digits.target, cv=kfold)
print(f'{estimator_name:>20}: ' +
f'mean accuracy={scores.mean():.2%}; ' +
f'standard deviation={scores.std():.2%}')
# 14.3.4 Hyperparameter Tuning
for k in range(1, 20, 2):
kfold = KFold(n_splits=10, random_state=11, shuffle=True)
knn = KNeighborsClassifier(n_neighbors=k)
scores = cross_val_score(estimator=knn,
X=digits.data, y=digits.target, cv=kfold)
print(f'k={k:<2}; mean accuracy={scores.mean():.2%}; ' +
f'standard deviation={scores.std():.2%}')
##########################################################################
# (C) Copyright 2019 by Deitel & Associates, Inc. and #
# Pearson Education, Inc. All Rights Reserved. #
# #
# DISCLAIMER: The authors and publisher of this book have used their #
# best efforts in preparing the book. These efforts include the #
# development, research, and testing of the theories and programs #
# to determine their effectiveness. The authors and publisher make #
# no warranty of any kind, expressed or implied, with regard to these #
# programs or to the documentation contained in these books. The authors #
# and publisher shall not be liable in any event for incidental or #
# consequential damages in connection with, or arising out of, the #
# furnishing, performance, or use of these programs. #
##########################################################################