ManjunathGIT
diff --git a/‎code/ch06/ch06.ipynb
+73-62 b/‎code/ch06/ch06.ipynb
+73-62
@@ -129,6 +129,17 @@
     "from IPython.display import Image"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "%matplotlib inline"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -347,8 +358,8 @@
    "source": [
     "import pandas as pd\n",
     "\n",
-    "df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None)\n",
-    "df.head()"
+    "df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases'\n",
+    "                 '/breast-cancer-wisconsin/wdbc.data', header=None)"
    ]
   },
   {
@@ -631,7 +642,7 @@
     "from sklearn.cross_validation import train_test_split\n",
     "\n",
     "X_train, X_test, y_train, y_test = \\\n",
-    "        train_test_split(X, y, test_size=0.20, random_state=1)"
+    "    train_test_split(X, y, test_size=0.20, random_state=1)"
    ]
   },
   {
@@ -671,8 +682,8 @@
     "from sklearn.pipeline import Pipeline\n",
     "\n",
     "pipe_lr = Pipeline([('scl', StandardScaler()),\n",
-    "            ('pca', PCA(n_components=2)),\n",
-    "            ('clf', LogisticRegression(random_state=1))])\n",
+    "                    ('pca', PCA(n_components=2)),\n",
+    "                    ('clf', LogisticRegression(random_state=1))])\n",
     "\n",
     "pipe_lr.fit(X_train, y_train)\n",
     "print('Test Accuracy: %.3f' % pipe_lr.score(X_test, y_test))\n",
@@ -843,7 +854,8 @@
     "    pipe_lr.fit(X_train[train], y_train[train])\n",
     "    score = pipe_lr.score(X_train[test], y_train[test])\n",
     "    scores.append(score)\n",
-    "    print('Fold: %s, Class dist.: %s, Acc: %.3f' % (k+1, np.bincount(y_train[train]), score))\n",
+    "    print('Fold: %s, Class dist.: %s, Acc: %.3f' % (k+1,\n",
+    "          np.bincount(y_train[train]), score))\n",
     "    \n",
     "print('\\nCV accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))"
    ]
@@ -868,9 +880,9 @@
    "source": [
     "from sklearn.cross_validation import cross_val_score\n",
     "\n",
-    "scores = cross_val_score(estimator=pipe_lr, \n",
-    "                         X=X_train, \n",
-    "                         y=y_train, \n",
+    "scores = cross_val_score(estimator=pipe_lr,\n",
+    "                         X=X_train,\n",
+    "                         y=y_train,\n",
     "                         cv=10,\n",
     "                         n_jobs=1)\n",
     "print('CV accuracy scores: %s' % scores)\n",
@@ -953,43 +965,42 @@
     }
    ],
    "source": [
-    "%matplotlib inline\n",
     "import matplotlib.pyplot as plt\n",
     "from sklearn.learning_curve import learning_curve\n",
     "\n",
     "pipe_lr = Pipeline([('scl', StandardScaler()),\n",
-    "            ('clf', LogisticRegression(penalty='l2', random_state=0))])\n",
+    "                    ('clf', LogisticRegression(penalty='l2', random_state=0))])\n",
     "\n",
     "train_sizes, train_scores, test_scores =\\\n",
-    "                learning_curve(estimator=pipe_lr, \n",
-    "                X=X_train, \n",
-    "                y=y_train, \n",
-    "                train_sizes=np.linspace(0.1, 1.0, 10), \n",
-    "                cv=10,\n",
-    "                n_jobs=1)\n",
+    "                learning_curve(estimator=pipe_lr,\n",
+    "                               X=X_train,\n",
+    "                               y=y_train,\n",
+    "                               train_sizes=np.linspace(0.1, 1.0, 10),\n",
+    "                               cv=10,\n",
+    "                               n_jobs=1)\n",
     "\n",
     "train_mean = np.mean(train_scores, axis=1)\n",
     "train_std = np.std(train_scores, axis=1)\n",
     "test_mean = np.mean(test_scores, axis=1)\n",
     "test_std = np.std(test_scores, axis=1)\n",
     "\n",
-    "plt.plot(train_sizes, train_mean, \n",
-    "         color='blue', marker='o', \n",
+    "plt.plot(train_sizes, train_mean,\n",
+    "         color='blue', marker='o',\n",
     "         markersize=5, label='training accuracy')\n",
     "\n",
-    "plt.fill_between(train_sizes, \n",
+    "plt.fill_between(train_sizes,\n",
     "                 train_mean + train_std,\n",
-    "                 train_mean - train_std, \n",
+    "                 train_mean - train_std,\n",
     "                 alpha=0.15, color='blue')\n",
     "\n",
-    "plt.plot(train_sizes, test_mean, \n",
-    "         color='green', linestyle='--', \n",
-    "         marker='s', markersize=5, \n",
+    "plt.plot(train_sizes, test_mean,\n",
+    "         color='green', linestyle='--',\n",
+    "         marker='s', markersize=5,\n",
     "         label='validation accuracy')\n",
     "\n",
-    "plt.fill_between(train_sizes, \n",
+    "plt.fill_between(train_sizes,\n",
     "                 test_mean + test_std,\n",
-    "                 test_mean - test_std, \n",
+    "                 test_mean - test_std,\n",
     "                 alpha=0.15, color='green')\n",
     "\n",
     "plt.grid()\n",
@@ -1231,10 +1242,10 @@
     }
    ],
    "source": [
-    "gs = GridSearchCV(estimator=pipe_svc, \n",
-    "                            param_grid=param_grid, \n",
-    "                            scoring='accuracy', \n",
-    "                            cv=2)\n",
+    "gs = GridSearchCV(estimator=pipe_svc,\n",
+    "                  param_grid=param_grid,\n",
+    "                  scoring='accuracy',\n",
+    "                  cv=2)\n",
     "\n",
     "# Note: Optionally, you could use cv=2 \n",
     "# in the GridSearchCV above to produce\n",
@@ -1261,10 +1272,10 @@
    ],
    "source": [
     "from sklearn.tree import DecisionTreeClassifier\n",
-    "gs = GridSearchCV(estimator=DecisionTreeClassifier(random_state=0), \n",
-    "                            param_grid=[{'max_depth': [1, 2, 3, 4, 5, 6, 7, None]}], \n",
-    "                            scoring='accuracy', \n",
-    "                            cv=2)\n",
+    "gs = GridSearchCV(estimator=DecisionTreeClassifier(random_state=0),\n",
+    "                  param_grid=[{'max_depth': [1, 2, 3, 4, 5, 6, 7, None]}],\n",
+    "                  scoring='accuracy',\n",
+    "                  cv=2)\n",
     "scores = cross_val_score(gs, X_train, y_train, scoring='accuracy', cv=5)\n",
     "print('CV accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))"
    ]
@@ -1561,23 +1572,23 @@
     }
    ],
    "source": [
-    "from sklearn.metrics import make_scorer, f1_score\n",
+    "from sklearn.metrics import make_scorer\n",
     "\n",
     "scorer = make_scorer(f1_score, pos_label=0)\n",
     "\n",
     "c_gamma_range = [0.01, 0.1, 1.0, 10.0]\n",
     "\n",
-    "param_grid = [{'clf__C': c_gamma_range, \n",
+    "param_grid = [{'clf__C': c_gamma_range,\n",
     "               'clf__kernel': ['linear']},\n",
-    "                 {'clf__C': c_gamma_range, \n",
-    "                  'clf__gamma': c_gamma_range, \n",
-    "                  'clf__kernel': ['rbf'],}]\n",
+    "              {'clf__C': c_gamma_range,\n",
+    "               'clf__gamma': c_gamma_range,\n",
+    "               'clf__kernel': ['rbf']}]\n",
     "\n",
-    "gs = GridSearchCV(estimator=pipe_svc, \n",
-    "                                param_grid=param_grid, \n",
-    "                                scoring=scorer, \n",
-    "                                cv=10,\n",
-    "                                n_jobs=-1)\n",
+    "gs = GridSearchCV(estimator=pipe_svc,\n",
+    "                  param_grid=param_grid,\n",
+    "                  scoring=scorer,\n",
+    "                  cv=10,\n",
+    "                  n_jobs=-1)\n",
     "gs = gs.fit(X_train, y_train)\n",
     "print(gs.best_score_)\n",
     "print(gs.best_params_)"
@@ -1637,37 +1648,37 @@
     "all_tpr = []\n",
     "\n",
     "for i, (train, test) in enumerate(cv):\n",
-    "    probas = pipe_lr.fit(X_train2[train], \n",
+    "    probas = pipe_lr.fit(X_train2[train],\n",
     "                         y_train[train]).predict_proba(X_train2[test])\n",
-    "    \n",
-    "    fpr, tpr, thresholds = roc_curve(y_train[test], \n",
-    "                                     probas[:, 1], \n",
+    "\n",
+    "    fpr, tpr, thresholds = roc_curve(y_train[test],\n",
+    "                                     probas[:, 1],\n",
     "                                     pos_label=1)\n",
     "    mean_tpr += interp(mean_fpr, fpr, tpr)\n",
     "    mean_tpr[0] = 0.0\n",
     "    roc_auc = auc(fpr, tpr)\n",
-    "    plt.plot(fpr, \n",
-    "             tpr, \n",
-    "             lw=1, \n",
-    "             label='ROC fold %d (area = %0.2f)' \n",
-    "                    % (i+1, roc_auc))\n",
+    "    plt.plot(fpr,\n",
+    "             tpr,\n",
+    "             lw=1,\n",
+    "             label='ROC fold %d (area = %0.2f)'\n",
+    "                   % (i+1, roc_auc))\n",
     "\n",
-    "plt.plot([0, 1], \n",
-    "         [0, 1], \n",
-    "         linestyle='--', \n",
-    "         color=(0.6, 0.6, 0.6), \n",
+    "plt.plot([0, 1],\n",
+    "         [0, 1],\n",
+    "         linestyle='--',\n",
+    "         color=(0.6, 0.6, 0.6),\n",
     "         label='random guessing')\n",
     "\n",
     "mean_tpr /= len(cv)\n",
     "mean_tpr[-1] = 1.0\n",
     "mean_auc = auc(mean_fpr, mean_tpr)\n",
     "plt.plot(mean_fpr, mean_tpr, 'k--',\n",
     "         label='mean ROC (area = %0.2f)' % mean_auc, lw=2)\n",
-    "plt.plot([0, 0, 1], \n",
-    "         [0, 1, 1], \n",
-    "         lw=2, \n",
-    "         linestyle=':', \n",
-    "         color='black', \n",
+    "plt.plot([0, 0, 1],\n",
+    "         [0, 1, 1],\n",
+    "         lw=2,\n",
+    "         linestyle=':',\n",
+    "         color='black',\n",
     "         label='perfect performance')\n",
     "\n",
     "plt.xlim([-0.05, 1.05])\n",