Skip to content

Commit 02ccf80

Browse files
committed
add notebooks
1 parent 30de746 commit 02ccf80

33 files changed

+16606
-0
lines changed

01-introduction.ipynb

+929
Large diffs are not rendered by default.

02-supervised-learning.ipynb

+4,294
Large diffs are not rendered by default.

03-unsupervised-learning.ipynb

+3,228
Large diffs are not rendered by default.

04-scikit-learn-API-summary.ipynb

+225
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {
7+
"collapsed": true,
8+
"hide_input": false
9+
},
10+
"outputs": [],
11+
"source": [
12+
"%matplotlib inline\n",
13+
"from preamble import *"
14+
]
15+
},
16+
{
17+
"cell_type": "markdown",
18+
"metadata": {},
19+
"source": [
20+
"# Summary of scikit-learn methods and usage\n",
21+
"## The Estimator Interface"
22+
]
23+
},
24+
{
25+
"cell_type": "code",
26+
"execution_count": 2,
27+
"metadata": {
28+
"collapsed": true
29+
},
30+
"outputs": [],
31+
"source": [
32+
"from sklearn.linear_model import LogisticRegression\n",
33+
"logreg = LogisticRegression()"
34+
]
35+
},
36+
{
37+
"cell_type": "markdown",
38+
"metadata": {},
39+
"source": [
40+
"## Fit resets a model"
41+
]
42+
},
43+
{
44+
"cell_type": "code",
45+
"execution_count": 3,
46+
"metadata": {
47+
"collapsed": false
48+
},
49+
"outputs": [
50+
{
51+
"data": {
52+
"text/plain": [
53+
"array([ True, True, True, True, True, True, True, True, True,\n",
54+
" True, True, True, True, True, True, True, True, True,\n",
55+
" True, True, True, True, True, True, True], dtype=bool)"
56+
]
57+
},
58+
"execution_count": 3,
59+
"metadata": {},
60+
"output_type": "execute_result"
61+
}
62+
],
63+
"source": [
64+
"# get some data\n",
65+
"from sklearn.datasets import make_blobs, load_iris\n",
66+
"from sklearn.model_selection import train_test_split\n",
67+
"\n",
68+
"# load iris\n",
69+
"iris = load_iris()\n",
70+
"\n",
71+
"# create some blobs\n",
72+
"X, y = make_blobs(random_state=0, centers=4)\n",
73+
"X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)\n",
74+
"\n",
75+
"# build a model on the iris dataset\n",
76+
"logreg = LogisticRegression()\n",
77+
"logreg.fit(iris.data, iris.target)\n",
78+
"# fit the model again on the blob dataset\n",
79+
"logreg.fit(X_train, y_train)\n",
80+
"# the outcome is the same as training a \"fresh\" model:\n",
81+
"new_logreg = LogisticRegression()\n",
82+
"new_logreg.fit(X_train, y_train)\n",
83+
"\n",
84+
"# predictions made by the two models are the same\n",
85+
"pred_new_logreg = new_logreg.predict(X_test)\n",
86+
"pred_logreg = logreg.predict(X_test)\n",
87+
"\n",
88+
"pred_logreg == pred_new_logreg"
89+
]
90+
},
91+
{
92+
"cell_type": "markdown",
93+
"metadata": {},
94+
"source": [
95+
"## Method chaining"
96+
]
97+
},
98+
{
99+
"cell_type": "code",
100+
"execution_count": 4,
101+
"metadata": {
102+
"collapsed": false
103+
},
104+
"outputs": [],
105+
"source": [
106+
"# instantiate model and fit it in one line\n",
107+
"logreg = LogisticRegression().fit(X_train, y_train)"
108+
]
109+
},
110+
{
111+
"cell_type": "code",
112+
"execution_count": 5,
113+
"metadata": {
114+
"collapsed": true
115+
},
116+
"outputs": [],
117+
"source": [
118+
"logreg = LogisticRegression()\n",
119+
"y_pred = logreg.fit(X_train, y_train).predict(X_test)"
120+
]
121+
},
122+
{
123+
"cell_type": "code",
124+
"execution_count": 6,
125+
"metadata": {
126+
"collapsed": true
127+
},
128+
"outputs": [],
129+
"source": [
130+
"y_pred = LogisticRegression().fit(X_train, y_train).predict(X_test)"
131+
]
132+
},
133+
{
134+
"cell_type": "markdown",
135+
"metadata": {},
136+
"source": [
137+
"## Shortcuts and efficient alternatives"
138+
]
139+
},
140+
{
141+
"cell_type": "code",
142+
"execution_count": 7,
143+
"metadata": {
144+
"collapsed": false
145+
},
146+
"outputs": [],
147+
"source": [
148+
"from sklearn.decomposition import PCA\n",
149+
"pca = PCA()\n",
150+
"# calling fit and transform in sequence (using method chaining)\n",
151+
"X_pca = pca.fit(X).transform(X)\n",
152+
"# same result, but more efficient computation\n",
153+
"X_pca_2 = pca.fit_transform(X)"
154+
]
155+
},
156+
{
157+
"cell_type": "markdown",
158+
"metadata": {},
159+
"source": [
160+
"## Important Attributes"
161+
]
162+
},
163+
{
164+
"cell_type": "code",
165+
"execution_count": 8,
166+
"metadata": {
167+
"collapsed": false
168+
},
169+
"outputs": [
170+
{
171+
"name": "stdout",
172+
"output_type": "stream",
173+
"text": [
174+
"unique entries of iris.target: [0 1 2]\n",
175+
"classes using iris.target: [0 1 2]\n",
176+
"unique entries of named_target: ['setosa' 'versicolor' 'virginica']\n",
177+
"classes using named_target: ['setosa' 'versicolor' 'virginica']\n"
178+
]
179+
}
180+
],
181+
"source": [
182+
"import numpy as np\n",
183+
"logreg = LogisticRegression()\n",
184+
"# fit model using original data\n",
185+
"logreg.fit(iris.data, iris.target)\n",
186+
"print(\"unique entries of iris.target: %s\" % np.unique(iris.target))\n",
187+
"print(\"classes using iris.target: %s\" % logreg.classes_)\n",
188+
"\n",
189+
"# represent each target by its class name\n",
190+
"named_target = iris.target_names[iris.target]\n",
191+
"logreg.fit(iris.data, named_target)\n",
192+
"print(\"unique entries of named_target: %s\" % np.unique(named_target))\n",
193+
"print(\"classes using named_target: %s\" % logreg.classes_)"
194+
]
195+
},
196+
{
197+
"cell_type": "markdown",
198+
"metadata": {},
199+
"source": [
200+
"## Summary and outlook"
201+
]
202+
}
203+
],
204+
"metadata": {
205+
"kernelspec": {
206+
"display_name": "Python 3",
207+
"language": "python",
208+
"name": "python3"
209+
},
210+
"language_info": {
211+
"codemirror_mode": {
212+
"name": "ipython",
213+
"version": 3
214+
},
215+
"file_extension": ".py",
216+
"mimetype": "text/x-python",
217+
"name": "python",
218+
"nbconvert_exporter": "python",
219+
"pygments_lexer": "ipython3",
220+
"version": "3.5.1"
221+
}
222+
},
223+
"nbformat": 4,
224+
"nbformat_minor": 0
225+
}

0 commit comments

Comments
 (0)