minor fixes to 04

amueller · amueller · commit 8b0ae2d85c2f · 2017-06-02T20:21:11.000+02:00
diff --git a/04-representing-data-feature-engineering.ipynb b/04-representing-data-feature-engineering.ipynb
@@ -137,8 +137,9 @@
     "import os\n",
     "# The file has no headers naming the columns, so we pass header=None\n",
     "# and provide the column names explicitly in \"names\"\n",
+    "adult_path = os.path.join(mglearn.datasets.DATA_PATH, \"adult.data\")\n",
     "data = pd.read_csv(\n",
-    "    os.path.join(mglearn.datasets.DATA_PATH, \"adult.data\"), header=None, index_col=False,\n",
+    "    adult_path, header=None, index_col=False,\n",
     "    names=['age', 'workclass', 'fnlwgt', 'education',  'education-num',\n",
     "           'marital-status', 'occupation', 'relationship', 'race', 'gender',\n",
     "           'capital-gain', 'capital-loss', 'hours-per-week', 'native-country',\n",
@@ -1347,7 +1348,7 @@
     }
    ],
    "source": [
-    "plt.hist(np.log(X_train_log[:, 0] + 1), bins=25, color='gray')\n",
+    "plt.hist(X_train_log[:, 0], bins=25, color='gray')\n",
     "plt.ylabel(\"Number of appearances\")\n",
     "plt.xlabel(\"Value\")"
    ]
@@ -1787,8 +1788,8 @@
    "source": [
     "# extract the target values (number of rentals)\n",
     "y = citibike.values\n",
-    "# convert the time to posixtime using \"%s\"\n",
-    "X = citibike.index.strftime(\"%s\").astype(\"int\").reshape(-1, 1)"
+    "# convert to POSIX time by dividing by 10**9\n",
+    "X = citibike.index.astype(\"int64\").values.reshape(-1, 1) // 10**9"
    ]
   },
   {