dell-datascience
diff --git a/‎Assignment 1.ipynb
+3,210 b/‎Assignment 1.ipynb
+3,210
diff --git a/‎Assignment 2.ipynb
+746 b/‎Assignment 2.ipynb
+746
diff --git a/‎Assignment 3.ipynb
+1,194 b/‎Assignment 3.ipynb
+1,194
diff --git a/‎Assignment 4.ipynb
+329 b/‎Assignment 4.ipynb
+329
diff --git a/‎Classifier Visualization.ipynb
+328 b/‎Classifier Visualization.ipynb
+328
@@ -0,0 +1,328 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "_You are currently looking at **version 1.0** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-machine-learning/resources/bANLa) course resource._\n",
+    "\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "# Classifier Visualization Playground\n",
+    "\n",
+    "The purpose of this notebook is to let you visualize various classsifiers' decision boundaries.\n",
+    "\n",
+    "The data used in this notebook is based on the [UCI Mushroom Data Set](http://archive.ics.uci.edu/ml/datasets/Mushroom?ref=datanews.io) stored in `mushrooms.csv`. \n",
+    "\n",
+    "In order to better vizualize the decision boundaries, we'll perform Principal Component Analysis (PCA) on the data to reduce the dimensionality to 2 dimensions. Dimensionality reduction will be covered in a later module of this course.\n",
+    "\n",
+    "Play around with different models and parameters to see how they affect the classifier's decision boundary and accuracy!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "%matplotlib notebook\n",
+    "\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.decomposition import PCA\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "df = pd.read_csv('readonly/mushrooms.csv')\n",
+    "df2 = pd.get_dummies(df)\n",
+    "\n",
+    "df3 = df2.sample(frac=0.08)\n",
+    "\n",
+    "X = df3.iloc[:,2:]\n",
+    "y = df3.iloc[:,1]\n",
+    "\n",
+    "\n",
+    "pca = PCA(n_components=2).fit_transform(X)\n",
+    "\n",
+    "X_train, X_test, y_train, y_test = train_test_split(pca, y, random_state=0)\n",
+    "\n",
+    "\n",
+    "plt.figure(dpi=120)\n",
+    "plt.scatter(pca[y.values==0,0], pca[y.values==0,1], alpha=0.5, label='Edible', s=2)\n",
+    "plt.scatter(pca[y.values==1,0], pca[y.values==1,1], alpha=0.5, label='Poisonous', s=2)\n",
+    "plt.legend()\n",
+    "plt.title('Mushroom Data Set\\nFirst Two Principal Components')\n",
+    "plt.xlabel('PC1')\n",
+    "plt.ylabel('PC2')\n",
+    "plt.gca().set_aspect('equal')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "def plot_mushroom_boundary(X, y, fitted_model):\n",
+    "\n",
+    "    plt.figure(figsize=(9.8,5), dpi=100)\n",
+    "    \n",
+    "    for i, plot_type in enumerate(['Decision Boundary', 'Decision Probabilities']):\n",
+    "        plt.subplot(1,2,i+1)\n",
+    "\n",
+    "        mesh_step_size = 0.01  # step size in the mesh\n",
+    "        x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1\n",
+    "        y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1\n",
+    "        xx, yy = np.meshgrid(np.arange(x_min, x_max, mesh_step_size), np.arange(y_min, y_max, mesh_step_size))\n",
+    "        if i == 0:\n",
+    "            Z = fitted_model.predict(np.c_[xx.ravel(), yy.ravel()])\n",
+    "        else:\n",
+    "            try:\n",
+    "                Z = fitted_model.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:,1]\n",
+    "            except:\n",
+    "                plt.text(0.4, 0.5, 'Probabilities Unavailable', horizontalalignment='center',\n",
+    "                     verticalalignment='center', transform = plt.gca().transAxes, fontsize=12)\n",
+    "                plt.axis('off')\n",
+    "                break\n",
+    "        Z = Z.reshape(xx.shape)\n",
+    "        plt.scatter(X[y.values==0,0], X[y.values==0,1], alpha=0.4, label='Edible', s=5)\n",
+    "        plt.scatter(X[y.values==1,0], X[y.values==1,1], alpha=0.4, label='Posionous', s=5)\n",
+    "        plt.imshow(Z, interpolation='nearest', cmap='RdYlBu_r', alpha=0.15, \n",
+    "                   extent=(x_min, x_max, y_min, y_max), origin='lower')\n",
+    "        plt.title(plot_type + '\\n' + \n",
+    "                  str(fitted_model).split('(')[0]+ ' Test Accuracy: ' + str(np.round(fitted_model.score(X, y), 5)))\n",
+    "        plt.gca().set_aspect('equal');\n",
+    "        \n",
+    "    plt.tight_layout()\n",
+    "    plt.subplots_adjust(top=0.9, bottom=0.08, wspace=0.02)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true,
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.linear_model import LogisticRegression\n",
+    "\n",
+    "model = LogisticRegression()\n",
+    "model.fit(X_train,y_train)\n",
+    "\n",
+    "plot_mushroom_boundary(X_test, y_test, model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.neighbors import KNeighborsClassifier\n",
+    "\n",
+    "model = KNeighborsClassifier(n_neighbors=20)\n",
+    "model.fit(X_train,y_train)\n",
+    "\n",
+    "plot_mushroom_boundary(X_test, y_test, model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "\n",
+    "model = DecisionTreeClassifier(max_depth=3)\n",
+    "model.fit(X_train,y_train)\n",
+    "\n",
+    "plot_mushroom_boundary(X_test, y_test, model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "\n",
+    "model = DecisionTreeClassifier()\n",
+    "model.fit(X_train,y_train)\n",
+    "\n",
+    "plot_mushroom_boundary(X_test, y_test, model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "\n",
+    "model = RandomForestClassifier()\n",
+    "model.fit(X_train,y_train)\n",
+    "\n",
+    "plot_mushroom_boundary(X_test, y_test, model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.svm import SVC\n",
+    "\n",
+    "model = SVC(kernel='linear')\n",
+    "model.fit(X_train,y_train)\n",
+    "\n",
+    "plot_mushroom_boundary(X_test, y_test, model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.svm import SVC\n",
+    "\n",
+    "model = SVC(kernel='rbf', C=1)\n",
+    "model.fit(X_train,y_train)\n",
+    "\n",
+    "plot_mushroom_boundary(X_test, y_test, model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.svm import SVC\n",
+    "\n",
+    "model = SVC(kernel='rbf', C=10)\n",
+    "model.fit(X_train,y_train)\n",
+    "\n",
+    "plot_mushroom_boundary(X_test, y_test, model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.naive_bayes import GaussianNB\n",
+    "\n",
+    "model = GaussianNB()\n",
+    "model.fit(X_train,y_train)\n",
+    "\n",
+    "plot_mushroom_boundary(X_test, y_test, model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.neural_network import MLPClassifier\n",
+    "\n",
+    "model = MLPClassifier()\n",
+    "model.fit(X_train,y_train)\n",
+    "\n",
+    "plot_mushroom_boundary(X_test, y_test, model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}