Skip to content

Commit 54d4008

Browse files
1st commit entire project
1 parent a78f2f8 commit 54d4008

12 files changed

+530029
-0
lines changed

Assignment 1.ipynb

+3,210
Large diffs are not rendered by default.

Assignment 2.ipynb

+746
Large diffs are not rendered by default.

Assignment 3.ipynb

+1,194
Large diffs are not rendered by default.

Assignment 4.ipynb

+329
Large diffs are not rendered by default.

Classifier Visualization.ipynb

+328
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,328 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"---\n",
8+
"\n",
9+
"_You are currently looking at **version 1.0** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-machine-learning/resources/bANLa) course resource._\n",
10+
"\n",
11+
"---"
12+
]
13+
},
14+
{
15+
"cell_type": "markdown",
16+
"metadata": {
17+
"deletable": true,
18+
"editable": true
19+
},
20+
"source": [
21+
"# Classifier Visualization Playground\n",
22+
"\n",
23+
"The purpose of this notebook is to let you visualize various classsifiers' decision boundaries.\n",
24+
"\n",
25+
"The data used in this notebook is based on the [UCI Mushroom Data Set](http://archive.ics.uci.edu/ml/datasets/Mushroom?ref=datanews.io) stored in `mushrooms.csv`. \n",
26+
"\n",
27+
"In order to better vizualize the decision boundaries, we'll perform Principal Component Analysis (PCA) on the data to reduce the dimensionality to 2 dimensions. Dimensionality reduction will be covered in a later module of this course.\n",
28+
"\n",
29+
"Play around with different models and parameters to see how they affect the classifier's decision boundary and accuracy!"
30+
]
31+
},
32+
{
33+
"cell_type": "code",
34+
"execution_count": null,
35+
"metadata": {
36+
"collapsed": false,
37+
"deletable": true,
38+
"editable": true
39+
},
40+
"outputs": [],
41+
"source": [
42+
"%matplotlib notebook\n",
43+
"\n",
44+
"import pandas as pd\n",
45+
"import numpy as np\n",
46+
"import matplotlib.pyplot as plt\n",
47+
"from sklearn.decomposition import PCA\n",
48+
"from sklearn.model_selection import train_test_split\n",
49+
"\n",
50+
"df = pd.read_csv('readonly/mushrooms.csv')\n",
51+
"df2 = pd.get_dummies(df)\n",
52+
"\n",
53+
"df3 = df2.sample(frac=0.08)\n",
54+
"\n",
55+
"X = df3.iloc[:,2:]\n",
56+
"y = df3.iloc[:,1]\n",
57+
"\n",
58+
"\n",
59+
"pca = PCA(n_components=2).fit_transform(X)\n",
60+
"\n",
61+
"X_train, X_test, y_train, y_test = train_test_split(pca, y, random_state=0)\n",
62+
"\n",
63+
"\n",
64+
"plt.figure(dpi=120)\n",
65+
"plt.scatter(pca[y.values==0,0], pca[y.values==0,1], alpha=0.5, label='Edible', s=2)\n",
66+
"plt.scatter(pca[y.values==1,0], pca[y.values==1,1], alpha=0.5, label='Poisonous', s=2)\n",
67+
"plt.legend()\n",
68+
"plt.title('Mushroom Data Set\\nFirst Two Principal Components')\n",
69+
"plt.xlabel('PC1')\n",
70+
"plt.ylabel('PC2')\n",
71+
"plt.gca().set_aspect('equal')"
72+
]
73+
},
74+
{
75+
"cell_type": "code",
76+
"execution_count": null,
77+
"metadata": {
78+
"collapsed": false
79+
},
80+
"outputs": [],
81+
"source": [
82+
"def plot_mushroom_boundary(X, y, fitted_model):\n",
83+
"\n",
84+
" plt.figure(figsize=(9.8,5), dpi=100)\n",
85+
" \n",
86+
" for i, plot_type in enumerate(['Decision Boundary', 'Decision Probabilities']):\n",
87+
" plt.subplot(1,2,i+1)\n",
88+
"\n",
89+
" mesh_step_size = 0.01 # step size in the mesh\n",
90+
" x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1\n",
91+
" y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1\n",
92+
" xx, yy = np.meshgrid(np.arange(x_min, x_max, mesh_step_size), np.arange(y_min, y_max, mesh_step_size))\n",
93+
" if i == 0:\n",
94+
" Z = fitted_model.predict(np.c_[xx.ravel(), yy.ravel()])\n",
95+
" else:\n",
96+
" try:\n",
97+
" Z = fitted_model.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:,1]\n",
98+
" except:\n",
99+
" plt.text(0.4, 0.5, 'Probabilities Unavailable', horizontalalignment='center',\n",
100+
" verticalalignment='center', transform = plt.gca().transAxes, fontsize=12)\n",
101+
" plt.axis('off')\n",
102+
" break\n",
103+
" Z = Z.reshape(xx.shape)\n",
104+
" plt.scatter(X[y.values==0,0], X[y.values==0,1], alpha=0.4, label='Edible', s=5)\n",
105+
" plt.scatter(X[y.values==1,0], X[y.values==1,1], alpha=0.4, label='Posionous', s=5)\n",
106+
" plt.imshow(Z, interpolation='nearest', cmap='RdYlBu_r', alpha=0.15, \n",
107+
" extent=(x_min, x_max, y_min, y_max), origin='lower')\n",
108+
" plt.title(plot_type + '\\n' + \n",
109+
" str(fitted_model).split('(')[0]+ ' Test Accuracy: ' + str(np.round(fitted_model.score(X, y), 5)))\n",
110+
" plt.gca().set_aspect('equal');\n",
111+
" \n",
112+
" plt.tight_layout()\n",
113+
" plt.subplots_adjust(top=0.9, bottom=0.08, wspace=0.02)"
114+
]
115+
},
116+
{
117+
"cell_type": "code",
118+
"execution_count": null,
119+
"metadata": {
120+
"collapsed": false,
121+
"deletable": true,
122+
"editable": true,
123+
"scrolled": false
124+
},
125+
"outputs": [],
126+
"source": [
127+
"from sklearn.linear_model import LogisticRegression\n",
128+
"\n",
129+
"model = LogisticRegression()\n",
130+
"model.fit(X_train,y_train)\n",
131+
"\n",
132+
"plot_mushroom_boundary(X_test, y_test, model)"
133+
]
134+
},
135+
{
136+
"cell_type": "code",
137+
"execution_count": null,
138+
"metadata": {
139+
"collapsed": false,
140+
"deletable": true,
141+
"editable": true
142+
},
143+
"outputs": [],
144+
"source": [
145+
"from sklearn.neighbors import KNeighborsClassifier\n",
146+
"\n",
147+
"model = KNeighborsClassifier(n_neighbors=20)\n",
148+
"model.fit(X_train,y_train)\n",
149+
"\n",
150+
"plot_mushroom_boundary(X_test, y_test, model)"
151+
]
152+
},
153+
{
154+
"cell_type": "code",
155+
"execution_count": null,
156+
"metadata": {
157+
"collapsed": false,
158+
"deletable": true,
159+
"editable": true
160+
},
161+
"outputs": [],
162+
"source": [
163+
"from sklearn.tree import DecisionTreeClassifier\n",
164+
"\n",
165+
"model = DecisionTreeClassifier(max_depth=3)\n",
166+
"model.fit(X_train,y_train)\n",
167+
"\n",
168+
"plot_mushroom_boundary(X_test, y_test, model)"
169+
]
170+
},
171+
{
172+
"cell_type": "code",
173+
"execution_count": null,
174+
"metadata": {
175+
"collapsed": false,
176+
"deletable": true,
177+
"editable": true
178+
},
179+
"outputs": [],
180+
"source": [
181+
"from sklearn.tree import DecisionTreeClassifier\n",
182+
"\n",
183+
"model = DecisionTreeClassifier()\n",
184+
"model.fit(X_train,y_train)\n",
185+
"\n",
186+
"plot_mushroom_boundary(X_test, y_test, model)"
187+
]
188+
},
189+
{
190+
"cell_type": "code",
191+
"execution_count": null,
192+
"metadata": {
193+
"collapsed": false,
194+
"deletable": true,
195+
"editable": true
196+
},
197+
"outputs": [],
198+
"source": [
199+
"from sklearn.ensemble import RandomForestClassifier\n",
200+
"\n",
201+
"model = RandomForestClassifier()\n",
202+
"model.fit(X_train,y_train)\n",
203+
"\n",
204+
"plot_mushroom_boundary(X_test, y_test, model)"
205+
]
206+
},
207+
{
208+
"cell_type": "code",
209+
"execution_count": null,
210+
"metadata": {
211+
"collapsed": false,
212+
"deletable": true,
213+
"editable": true
214+
},
215+
"outputs": [],
216+
"source": [
217+
"from sklearn.svm import SVC\n",
218+
"\n",
219+
"model = SVC(kernel='linear')\n",
220+
"model.fit(X_train,y_train)\n",
221+
"\n",
222+
"plot_mushroom_boundary(X_test, y_test, model)"
223+
]
224+
},
225+
{
226+
"cell_type": "code",
227+
"execution_count": null,
228+
"metadata": {
229+
"collapsed": false,
230+
"deletable": true,
231+
"editable": true
232+
},
233+
"outputs": [],
234+
"source": [
235+
"from sklearn.svm import SVC\n",
236+
"\n",
237+
"model = SVC(kernel='rbf', C=1)\n",
238+
"model.fit(X_train,y_train)\n",
239+
"\n",
240+
"plot_mushroom_boundary(X_test, y_test, model)"
241+
]
242+
},
243+
{
244+
"cell_type": "code",
245+
"execution_count": null,
246+
"metadata": {
247+
"collapsed": false,
248+
"deletable": true,
249+
"editable": true
250+
},
251+
"outputs": [],
252+
"source": [
253+
"from sklearn.svm import SVC\n",
254+
"\n",
255+
"model = SVC(kernel='rbf', C=10)\n",
256+
"model.fit(X_train,y_train)\n",
257+
"\n",
258+
"plot_mushroom_boundary(X_test, y_test, model)"
259+
]
260+
},
261+
{
262+
"cell_type": "code",
263+
"execution_count": null,
264+
"metadata": {
265+
"collapsed": false,
266+
"deletable": true,
267+
"editable": true
268+
},
269+
"outputs": [],
270+
"source": [
271+
"from sklearn.naive_bayes import GaussianNB\n",
272+
"\n",
273+
"model = GaussianNB()\n",
274+
"model.fit(X_train,y_train)\n",
275+
"\n",
276+
"plot_mushroom_boundary(X_test, y_test, model)"
277+
]
278+
},
279+
{
280+
"cell_type": "code",
281+
"execution_count": null,
282+
"metadata": {
283+
"collapsed": false,
284+
"deletable": true,
285+
"editable": true
286+
},
287+
"outputs": [],
288+
"source": [
289+
"from sklearn.neural_network import MLPClassifier\n",
290+
"\n",
291+
"model = MLPClassifier()\n",
292+
"model.fit(X_train,y_train)\n",
293+
"\n",
294+
"plot_mushroom_boundary(X_test, y_test, model)"
295+
]
296+
},
297+
{
298+
"cell_type": "code",
299+
"execution_count": null,
300+
"metadata": {
301+
"collapsed": true
302+
},
303+
"outputs": [],
304+
"source": []
305+
}
306+
],
307+
"metadata": {
308+
"kernelspec": {
309+
"display_name": "Python 3",
310+
"language": "python",
311+
"name": "python3"
312+
},
313+
"language_info": {
314+
"codemirror_mode": {
315+
"name": "ipython",
316+
"version": 3
317+
},
318+
"file_extension": ".py",
319+
"mimetype": "text/x-python",
320+
"name": "python",
321+
"nbconvert_exporter": "python",
322+
"pygments_lexer": "ipython3",
323+
"version": "3.6.2"
324+
}
325+
},
326+
"nbformat": 4,
327+
"nbformat_minor": 2
328+
}

0 commit comments

Comments
 (0)