forked from pdeitel/PythonFundamentalsLiveLessons
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path14_05.py
executable file
·146 lines (95 loc) · 4.15 KB
/
14_05.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# 14.5 Case Study: Multiple Linear Regression with the California Housing Dataset
# 14.5.1 Loading the Dataset
# Loading the Data
from sklearn.datasets import fetch_california_housing
california = fetch_california_housing()
# Displaying the Dataset’s Description
print(california.DESCR)
california.data.shape
california.target.shape
california.feature_names
# 14.5.2 Exploring the Data with Pandas
import pandas as pd
pd.set_option('precision', 4)
pd.set_option('max_columns', 9)
pd.set_option('display.width', None)
california_df = pd.DataFrame(california.data,
columns=california.feature_names)
california_df['MedHouseValue'] = pd.Series(california.target)
california_df.head()
california_df.describe()
# 14.5.3 Visualizing the Features
sample_df = california_df.sample(frac=0.1, random_state=17)
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(font_scale=2)
sns.set_style('whitegrid')
for feature in california.feature_names:
plt.figure(figsize=(16, 9))
sns.scatterplot(data=sample_df, x=feature,
y='MedHouseValue', hue='MedHouseValue',
palette='cool', legend=False)
# 14.5.4 Splitting the Data for Training and Testing
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
california.data, california.target, random_state=11)
X_train.shape
X_test.shape
# 14.5.5 Training the Model
from sklearn.linear_model import LinearRegression
linear_regression = LinearRegression()
linear_regression.fit(X=X_train, y=y_train)
for i, name in enumerate(california.feature_names):
print(f'{name:>10}: {linear_regression.coef_[i]}')
linear_regression.intercept_
# 14.5.6 Testing the Model
predicted = linear_regression.predict(X_test)
expected = y_test
predicted[:5]
expected[:5]
# 14.5.7 Visualizing the Expected vs. Predicted Prices
df = pd.DataFrame()
df['Expected'] = pd.Series(expected)
df['Predicted'] = pd.Series(predicted)
figure = plt.figure(figsize=(9, 9))
axes = sns.scatterplot(data=df, x='Expected', y='Predicted',
hue='Predicted', palette='cool', legend=False)
start = min(expected.min(), predicted.min())
end = max(expected.max(), predicted.max())
axes.set_xlim(start, end)
axes.set_ylim(start, end)
line = plt.plot([start, end], [start, end], 'k--')
# 14.5.8 Regression Model Metrics
from sklearn import metrics
metrics.r2_score(expected, predicted)
metrics.mean_squared_error(expected, predicted)
# 14.5.9 Choosing the Best Model
from sklearn.linear_model import ElasticNet, Lasso, Ridge
estimators = {
'LinearRegression': linear_regression,
'ElasticNet': ElasticNet(),
'Lasso': Lasso(),
'Ridge': Ridge()
}
from sklearn.model_selection import KFold, cross_val_score
for estimator_name, estimator_object in estimators.items():
kfold = KFold(n_splits=10, random_state=11, shuffle=True)
scores = cross_val_score(estimator=estimator_object,
X=california.data, y=california.target, cv=kfold,
scoring='r2')
print(f'{estimator_name:>16}: ' +
f'mean of r2 scores={scores.mean():.3f}')
##########################################################################
# (C) Copyright 2019 by Deitel & Associates, Inc. and #
# Pearson Education, Inc. All Rights Reserved. #
# #
# DISCLAIMER: The authors and publisher of this book have used their #
# best efforts in preparing the book. These efforts include the #
# development, research, and testing of the theories and programs #
# to determine their effectiveness. The authors and publisher make #
# no warranty of any kind, expressed or implied, with regard to these #
# programs or to the documentation contained in these books. The authors #
# and publisher shall not be liable in any event for incidental or #
# consequential damages in connection with, or arising out of, the #
# furnishing, performance, or use of these programs. #
##########################################################################