# 14.5 Case Study: Multiple Linear Regression with the California Housing Dataset
## 14.5.1 Loading the Dataset
### Loading the Data
**We added `%matplotlib inline` to enable Matplotlib in this notebook.**

In [None]:
%matplotlib inline
from sklearn.datasets import fetch_california_housing

In [None]:
california = fetch_california_housing()

### Displaying the Dataset’s Description

In [None]:
print(california.DESCR)

In [None]:
california.data.shape

In [None]:
california.target.shape

In [None]:
california.feature_names

## 14.5.2 Exploring the Data with Pandas

In [None]:
import pandas as pd

In [None]:
pd.set_option('precision', 4)

In [None]:
pd.set_option('max_columns', 9)

In [None]:
pd.set_option('display.width', None)

In [None]:
california_df = pd.DataFrame(california.data, 
 columns=california.feature_names)
 

In [None]:
california_df['MedHouseValue'] = pd.Series(california.target)

In [None]:
california_df.head()

In [None]:
california_df.describe()

## 14.5.3 Visualizing the Features 

In [None]:
sample_df = california_df.sample(frac=0.1, random_state=17)

In [None]:
import matplotlib.pyplot as plt

In [None]:
import seaborn as sns

In [None]:
sns.set(font_scale=2)

In [None]:
sns.set_style('whitegrid') 

In [None]:
for feature in california.feature_names:
 plt.figure(figsize=(16, 9))
 sns.scatterplot(data=sample_df, x=feature, 
 y='MedHouseValue', hue='MedHouseValue', 
 palette='cool', legend=False)
 

## 14.5.4 Splitting the Data for Training and Testing 

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
 california.data, california.target, random_state=11)

In [None]:
X_train.shape

In [None]:
X_test.shape

## 14.5.5 Training the Model 

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
linear_regression = LinearRegression()

In [None]:
linear_regression.fit(X=X_train, y=y_train)

In [None]:
for i, name in enumerate(california.feature_names):
 print(f'{name:>10}: {linear_regression.coef_[i]}')

In [None]:
linear_regression.intercept_

## 14.5.6 Testing the Model 

In [None]:
predicted = linear_regression.predict(X_test)

In [None]:
expected = y_test

In [None]:
predicted[:5]

In [None]:
expected[:5]

## 14.5.7 Visualizing the Expected vs. Predicted Prices 

In [None]:
df = pd.DataFrame()

In [None]:
df['Expected'] = pd.Series(expected)

In [None]:
df['Predicted'] = pd.Series(predicted)

In [None]:
figure = plt.figure(figsize=(9, 9))

axes = sns.scatterplot(data=df, x='Expected', y='Predicted', 
 hue='Predicted', palette='cool', legend=False)

start = min(expected.min(), predicted.min())

end = max(expected.max(), predicted.max())

axes.set_xlim(start, end)

axes.set_ylim(start, end)

line = plt.plot([start, end], [start, end], 'k--')

In [None]:
# This placeholder cell was added because we had to combine 
# the sections snippets 37-43 for the visualization to work in Jupyter
# and want the subsequent snippet numbers to match the book

In [None]:
# Placeholder cell 

In [None]:
# Placeholder cell 

In [None]:
# Placeholder cell 

In [None]:
# Placeholder cell 

In [None]:
# Placeholder cell 

## 14.5.8 Regression Model Metrics 
 

In [None]:
from sklearn import metrics

In [None]:
metrics.r2_score(expected, predicted)

In [None]:
metrics.mean_squared_error(expected, predicted)

## 14.5.9 Choosing the Best Model

In [None]:
from sklearn.linear_model import ElasticNet, Lasso, Ridge

In [None]:
estimators = {
 'LinearRegression': linear_regression,
 'ElasticNet': ElasticNet(),
 'Lasso': Lasso(),
 'Ridge': Ridge()
}

In [None]:
from sklearn.model_selection import KFold, cross_val_score

In [None]:
for estimator_name, estimator_object in estimators.items():
 kfold = KFold(n_splits=10, random_state=11, shuffle=True)
 scores = cross_val_score(estimator=estimator_object, 
 X=california.data, y=california.target, cv=kfold,
 scoring='r2')
 print(f'{estimator_name:>16}: ' + 
 f'mean of r2 scores={scores.mean():.3f}')

# More Info 
* See **video** Lesson 14 in [**Python Fundamentals LiveLessons** on Safari Online Learning](https://learning.oreilly.com/videos/python-fundamentals/9780135917411)
* See **book** Chapter 14 in [**Python for Programmers** on Safari Online Learning](https://learning.oreilly.com/library/view/python-for-programmers/9780135231364/), or see **book** Chapter 15 in **Intro to Python for Computer Science and Data Science**
* Interested in a print book? Check out:

| Python for Programmers | Intro to Python for Computer<br>Science and Data Science
| :------ | :------
| <a href="https://amzn.to/2VvdnxE"><img alt="Python for Programmers cover" src="../images/PyFPCover.png" width="150" border="1"/></a> | <a href="https://amzn.to/2LiDCmt"><img alt="Intro to Python for Computer Science and Data Science: Learning to Program with AI, Big Data and the Cloud" src="../images/IntroToPythonCover.png" width="159" border="1"></a>

>Please **do not** purchase both books—our professional book **_Python for Programmers_** is a subset of our college textbook **_Intro to Python for Computer Science and Data Science_**

In [None]:
##########################################################################
# (C) Copyright 2019 by Deitel & Associates, Inc. and #
# Pearson Education, Inc. All Rights Reserved. #
# #
# DISCLAIMER: The authors and publisher of this book have used their #
# best efforts in preparing the book. These efforts include the #
# development, research, and testing of the theories and programs #
# to determine their effectiveness. The authors and publisher make #
# no warranty of any kind, expressed or implied, with regard to these #
# programs or to the documentation contained in these books. The authors #
# and publisher shall not be liable in any event for incidental or #
# consequential damages in connection with, or arising out of, the #
# furnishing, performance, or use of these programs. #
##########################################################################
