Skip to content

Commit 115eeef

Browse files
committed
pep8 and py scripts part 2
1 parent 2ed0749 commit 115eeef

File tree

13 files changed

+2140
-275
lines changed

13 files changed

+2140
-275
lines changed

code/ch06/ch06.ipynb

+73-62
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,17 @@
129129
"from IPython.display import Image"
130130
]
131131
},
132+
{
133+
"cell_type": "code",
134+
"execution_count": null,
135+
"metadata": {
136+
"collapsed": true
137+
},
138+
"outputs": [],
139+
"source": [
140+
"%matplotlib inline"
141+
]
142+
},
132143
{
133144
"cell_type": "markdown",
134145
"metadata": {},
@@ -347,8 +358,8 @@
347358
"source": [
348359
"import pandas as pd\n",
349360
"\n",
350-
"df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None)\n",
351-
"df.head()"
361+
"df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases'\n",
362+
" '/breast-cancer-wisconsin/wdbc.data', header=None)"
352363
]
353364
},
354365
{
@@ -631,7 +642,7 @@
631642
"from sklearn.cross_validation import train_test_split\n",
632643
"\n",
633644
"X_train, X_test, y_train, y_test = \\\n",
634-
" train_test_split(X, y, test_size=0.20, random_state=1)"
645+
" train_test_split(X, y, test_size=0.20, random_state=1)"
635646
]
636647
},
637648
{
@@ -671,8 +682,8 @@
671682
"from sklearn.pipeline import Pipeline\n",
672683
"\n",
673684
"pipe_lr = Pipeline([('scl', StandardScaler()),\n",
674-
" ('pca', PCA(n_components=2)),\n",
675-
" ('clf', LogisticRegression(random_state=1))])\n",
685+
" ('pca', PCA(n_components=2)),\n",
686+
" ('clf', LogisticRegression(random_state=1))])\n",
676687
"\n",
677688
"pipe_lr.fit(X_train, y_train)\n",
678689
"print('Test Accuracy: %.3f' % pipe_lr.score(X_test, y_test))\n",
@@ -843,7 +854,8 @@
843854
" pipe_lr.fit(X_train[train], y_train[train])\n",
844855
" score = pipe_lr.score(X_train[test], y_train[test])\n",
845856
" scores.append(score)\n",
846-
" print('Fold: %s, Class dist.: %s, Acc: %.3f' % (k+1, np.bincount(y_train[train]), score))\n",
857+
" print('Fold: %s, Class dist.: %s, Acc: %.3f' % (k+1,\n",
858+
" np.bincount(y_train[train]), score))\n",
847859
" \n",
848860
"print('\\nCV accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))"
849861
]
@@ -868,9 +880,9 @@
868880
"source": [
869881
"from sklearn.cross_validation import cross_val_score\n",
870882
"\n",
871-
"scores = cross_val_score(estimator=pipe_lr, \n",
872-
" X=X_train, \n",
873-
" y=y_train, \n",
883+
"scores = cross_val_score(estimator=pipe_lr,\n",
884+
" X=X_train,\n",
885+
" y=y_train,\n",
874886
" cv=10,\n",
875887
" n_jobs=1)\n",
876888
"print('CV accuracy scores: %s' % scores)\n",
@@ -953,43 +965,42 @@
953965
}
954966
],
955967
"source": [
956-
"%matplotlib inline\n",
957968
"import matplotlib.pyplot as plt\n",
958969
"from sklearn.learning_curve import learning_curve\n",
959970
"\n",
960971
"pipe_lr = Pipeline([('scl', StandardScaler()),\n",
961-
" ('clf', LogisticRegression(penalty='l2', random_state=0))])\n",
972+
" ('clf', LogisticRegression(penalty='l2', random_state=0))])\n",
962973
"\n",
963974
"train_sizes, train_scores, test_scores =\\\n",
964-
" learning_curve(estimator=pipe_lr, \n",
965-
" X=X_train, \n",
966-
" y=y_train, \n",
967-
" train_sizes=np.linspace(0.1, 1.0, 10), \n",
968-
" cv=10,\n",
969-
" n_jobs=1)\n",
975+
" learning_curve(estimator=pipe_lr,\n",
976+
" X=X_train,\n",
977+
" y=y_train,\n",
978+
" train_sizes=np.linspace(0.1, 1.0, 10),\n",
979+
" cv=10,\n",
980+
" n_jobs=1)\n",
970981
"\n",
971982
"train_mean = np.mean(train_scores, axis=1)\n",
972983
"train_std = np.std(train_scores, axis=1)\n",
973984
"test_mean = np.mean(test_scores, axis=1)\n",
974985
"test_std = np.std(test_scores, axis=1)\n",
975986
"\n",
976-
"plt.plot(train_sizes, train_mean, \n",
977-
" color='blue', marker='o', \n",
987+
"plt.plot(train_sizes, train_mean,\n",
988+
" color='blue', marker='o',\n",
978989
" markersize=5, label='training accuracy')\n",
979990
"\n",
980-
"plt.fill_between(train_sizes, \n",
991+
"plt.fill_between(train_sizes,\n",
981992
" train_mean + train_std,\n",
982-
" train_mean - train_std, \n",
993+
" train_mean - train_std,\n",
983994
" alpha=0.15, color='blue')\n",
984995
"\n",
985-
"plt.plot(train_sizes, test_mean, \n",
986-
" color='green', linestyle='--', \n",
987-
" marker='s', markersize=5, \n",
996+
"plt.plot(train_sizes, test_mean,\n",
997+
" color='green', linestyle='--',\n",
998+
" marker='s', markersize=5,\n",
988999
" label='validation accuracy')\n",
9891000
"\n",
990-
"plt.fill_between(train_sizes, \n",
1001+
"plt.fill_between(train_sizes,\n",
9911002
" test_mean + test_std,\n",
992-
" test_mean - test_std, \n",
1003+
" test_mean - test_std,\n",
9931004
" alpha=0.15, color='green')\n",
9941005
"\n",
9951006
"plt.grid()\n",
@@ -1231,10 +1242,10 @@
12311242
}
12321243
],
12331244
"source": [
1234-
"gs = GridSearchCV(estimator=pipe_svc, \n",
1235-
" param_grid=param_grid, \n",
1236-
" scoring='accuracy', \n",
1237-
" cv=2)\n",
1245+
"gs = GridSearchCV(estimator=pipe_svc,\n",
1246+
" param_grid=param_grid,\n",
1247+
" scoring='accuracy',\n",
1248+
" cv=2)\n",
12381249
"\n",
12391250
"# Note: Optionally, you could use cv=2 \n",
12401251
"# in the GridSearchCV above to produce\n",
@@ -1261,10 +1272,10 @@
12611272
],
12621273
"source": [
12631274
"from sklearn.tree import DecisionTreeClassifier\n",
1264-
"gs = GridSearchCV(estimator=DecisionTreeClassifier(random_state=0), \n",
1265-
" param_grid=[{'max_depth': [1, 2, 3, 4, 5, 6, 7, None]}], \n",
1266-
" scoring='accuracy', \n",
1267-
" cv=2)\n",
1275+
"gs = GridSearchCV(estimator=DecisionTreeClassifier(random_state=0),\n",
1276+
" param_grid=[{'max_depth': [1, 2, 3, 4, 5, 6, 7, None]}],\n",
1277+
" scoring='accuracy',\n",
1278+
" cv=2)\n",
12681279
"scores = cross_val_score(gs, X_train, y_train, scoring='accuracy', cv=5)\n",
12691280
"print('CV accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))"
12701281
]
@@ -1561,23 +1572,23 @@
15611572
}
15621573
],
15631574
"source": [
1564-
"from sklearn.metrics import make_scorer, f1_score\n",
1575+
"from sklearn.metrics import make_scorer\n",
15651576
"\n",
15661577
"scorer = make_scorer(f1_score, pos_label=0)\n",
15671578
"\n",
15681579
"c_gamma_range = [0.01, 0.1, 1.0, 10.0]\n",
15691580
"\n",
1570-
"param_grid = [{'clf__C': c_gamma_range, \n",
1581+
"param_grid = [{'clf__C': c_gamma_range,\n",
15711582
" 'clf__kernel': ['linear']},\n",
1572-
" {'clf__C': c_gamma_range, \n",
1573-
" 'clf__gamma': c_gamma_range, \n",
1574-
" 'clf__kernel': ['rbf'],}]\n",
1583+
" {'clf__C': c_gamma_range,\n",
1584+
" 'clf__gamma': c_gamma_range,\n",
1585+
" 'clf__kernel': ['rbf']}]\n",
15751586
"\n",
1576-
"gs = GridSearchCV(estimator=pipe_svc, \n",
1577-
" param_grid=param_grid, \n",
1578-
" scoring=scorer, \n",
1579-
" cv=10,\n",
1580-
" n_jobs=-1)\n",
1587+
"gs = GridSearchCV(estimator=pipe_svc,\n",
1588+
" param_grid=param_grid,\n",
1589+
" scoring=scorer,\n",
1590+
" cv=10,\n",
1591+
" n_jobs=-1)\n",
15811592
"gs = gs.fit(X_train, y_train)\n",
15821593
"print(gs.best_score_)\n",
15831594
"print(gs.best_params_)"
@@ -1637,37 +1648,37 @@
16371648
"all_tpr = []\n",
16381649
"\n",
16391650
"for i, (train, test) in enumerate(cv):\n",
1640-
" probas = pipe_lr.fit(X_train2[train], \n",
1651+
" probas = pipe_lr.fit(X_train2[train],\n",
16411652
" y_train[train]).predict_proba(X_train2[test])\n",
1642-
" \n",
1643-
" fpr, tpr, thresholds = roc_curve(y_train[test], \n",
1644-
" probas[:, 1], \n",
1653+
"\n",
1654+
" fpr, tpr, thresholds = roc_curve(y_train[test],\n",
1655+
" probas[:, 1],\n",
16451656
" pos_label=1)\n",
16461657
" mean_tpr += interp(mean_fpr, fpr, tpr)\n",
16471658
" mean_tpr[0] = 0.0\n",
16481659
" roc_auc = auc(fpr, tpr)\n",
1649-
" plt.plot(fpr, \n",
1650-
" tpr, \n",
1651-
" lw=1, \n",
1652-
" label='ROC fold %d (area = %0.2f)' \n",
1653-
" % (i+1, roc_auc))\n",
1660+
" plt.plot(fpr,\n",
1661+
" tpr,\n",
1662+
" lw=1,\n",
1663+
" label='ROC fold %d (area = %0.2f)'\n",
1664+
" % (i+1, roc_auc))\n",
16541665
"\n",
1655-
"plt.plot([0, 1], \n",
1656-
" [0, 1], \n",
1657-
" linestyle='--', \n",
1658-
" color=(0.6, 0.6, 0.6), \n",
1666+
"plt.plot([0, 1],\n",
1667+
" [0, 1],\n",
1668+
" linestyle='--',\n",
1669+
" color=(0.6, 0.6, 0.6),\n",
16591670
" label='random guessing')\n",
16601671
"\n",
16611672
"mean_tpr /= len(cv)\n",
16621673
"mean_tpr[-1] = 1.0\n",
16631674
"mean_auc = auc(mean_fpr, mean_tpr)\n",
16641675
"plt.plot(mean_fpr, mean_tpr, 'k--',\n",
16651676
" label='mean ROC (area = %0.2f)' % mean_auc, lw=2)\n",
1666-
"plt.plot([0, 0, 1], \n",
1667-
" [0, 1, 1], \n",
1668-
" lw=2, \n",
1669-
" linestyle=':', \n",
1670-
" color='black', \n",
1677+
"plt.plot([0, 0, 1],\n",
1678+
" [0, 1, 1],\n",
1679+
" lw=2,\n",
1680+
" linestyle=':',\n",
1681+
" color='black',\n",
16711682
" label='perfect performance')\n",
16721683
"\n",
16731684
"plt.xlim([-0.05, 1.05])\n",

0 commit comments

Comments
 (0)