DreamerX91
diff --git a/‎examples/ch15/IMDB_RNN.ipynb
+847 b/‎examples/ch15/IMDB_RNN.ipynb
+847
diff --git a/‎examples/ch15/MNIST_CNN.ipynb
+772 b/‎examples/ch15/MNIST_CNN.ipynb
+772
diff --git a/‎examples/ch15/MNIST_CNN_TensorBoard.ipynb
+787 b/‎examples/ch15/MNIST_CNN_TensorBoard.ipynb
+787
diff --git a/‎examples/ch15/images/IntroToPythonCover.png
164 KB b/‎examples/ch15/images/IntroToPythonCover.png
164 KB
diff --git a/‎examples/ch15/images/IntroToPythonTOC.png
750 KB b/‎examples/ch15/images/IntroToPythonTOC.png
750 KB
diff --git a/‎examples/ch15/images/PyFPCover.png
332 KB b/‎examples/ch15/images/PyFPCover.png
332 KB
diff --git a/‎examples/ch15/images/PythonFundamentalsLiveLessons.jpg
29.9 KB b/‎examples/ch15/images/PythonFundamentalsLiveLessons.jpg
29.9 KB
diff --git a/‎examples/ch15/images/course_image.jpg
32.3 KB b/‎examples/ch15/images/course_image.jpg
32.3 KB
diff --git a/‎examples/ch16/HadoopMapReduce/length_mapper.py
+30 b/‎examples/ch16/HadoopMapReduce/length_mapper.py
+30
diff --git a/‎examples/ch16/HadoopMapReduce/length_reducer.py
+34 b/‎examples/ch16/HadoopMapReduce/length_reducer.py
+34
diff --git a/‎examples/ch16/HadoopMapReduce/yarn.txt
+1 b/‎examples/ch16/HadoopMapReduce/yarn.txt
+1
diff --git a/‎examples/ch16/README.txt
+5 b/‎examples/ch16/README.txt
+5
diff --git a/‎examples/ch16/SparkHashtagSummarizer/hashtagsummarizer.ipynb
+266 b/‎examples/ch16/SparkHashtagSummarizer/hashtagsummarizer.ipynb
+266
diff --git a/‎examples/ch16/SparkHashtagSummarizer/keys.py
+6 b/‎examples/ch16/SparkHashtagSummarizer/keys.py
+6
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+# length_mapper.py
+"""Maps lines of text to key-value pairs of word lengths and 1."""
+import sys
+
+def tokenize_input():
+    """Split each line of standard input into a list of strings."""
+    for line in sys.stdin:
+        yield line.split()
+
+# read each line in the the standard input and for every word 
+# produce a key-value pair containing the word, a tab and 1
+for line in tokenize_input():
+    for word in line:
+        print(str(len(word)) + '\t1')
+
+##########################################################################
+# (C) Copyright 2019 by Deitel & Associates, Inc. and                    #
+# Pearson Education, Inc. All Rights Reserved.                           #
+#                                                                        #
+# DISCLAIMER: The authors and publisher of this book have used their     #
+# best efforts in preparing the book. These efforts include the          #
+# development, research, and testing of the theories and programs        #
+# to determine their effectiveness. The authors and publisher make       #
+# no warranty of any kind, expressed or implied, with regard to these    #
+# programs or to the documentation contained in these books. The authors #
+# and publisher shall not be liable in any event for incidental or       #
+# consequential damages in connection with, or arising out of, the       #
+# furnishing, performance, or use of these programs.                     #
+##########################################################################
@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+# length_reducer.py
+"""Counts the number of words with each length."""
+import sys
+from itertools import groupby
+from operator import itemgetter
+
+def tokenize_input():
+    """Split each line of standard input into a key and a value."""
+    for line in sys.stdin:
+        yield line.strip().split('\t')
+
+# produce key-value pairs of word lengths and counts separated by tabs
+for word_length, group in groupby(tokenize_input(), itemgetter(0)):
+    try:
+        total = sum(int(count) for word_length, count in group)
+        print(word_length + '\t' + str(total))
+    except ValueError:
+        pass  # ignore word if its count was not an integer
+
+##########################################################################
+# (C) Copyright 2019 by Deitel & Associates, Inc. and                    #
+# Pearson Education, Inc. All Rights Reserved.                           #
+#                                                                        #
+# DISCLAIMER: The authors and publisher of this book have used their     #
+# best efforts in preparing the book. These efforts include the          #
+# development, research, and testing of the theories and programs        #
+# to determine their effectiveness. The authors and publisher make       #
+# no warranty of any kind, expressed or implied, with regard to these    #
+# programs or to the documentation contained in these books. The authors #
+# and publisher shall not be liable in any event for incidental or       #
+# consequential damages in connection with, or arising out of, the       #
+# furnishing, performance, or use of these programs.                     #
+##########################################################################
@@ -0,0 +1 @@
+yarn jar /usr/hdp/current/hadoop-mapreduce-client/hadoop-streaming.jar -D mapred.output.key.comparator.class=org.apache.hadoop.mapred.lib.KeyFieldBasedComparator -D mapred.text.key.comparator.options=-n -files length_mapper.py,length_reducer.py -mapper length_mapper.py -reducer length_reducer.py -input /example/data/RomeoAndJuliet.txt -output /example/wordlengthsoutput    
@@ -0,0 +1,5 @@
+Unlike the prior chapters, any snippet files and notebook files 
+are located in the example folders mentioned in each section.
+
+Some examples were implemented only in notebooks because they 
+execute in cloud-based cluster environments.
@@ -0,0 +1,266 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Import this app's dependencies."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pyspark import SparkContext\n",
+    "from pyspark.streaming import StreamingContext\n",
+    "from pyspark.sql import Row, SparkSession\n",
+    "from IPython import display\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "%matplotlib inline "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "From the PySpark Streaming Programming Guide at https://spark.apache.org/docs/latest/streaming-programming-guide.html#dataframe-and-sql-operations. This is the recommended way for each cluster node to get the SparkSession."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def getSparkSessionInstance(sparkConf):\n",
+    "    \"\"\"Spark Streaming Programming Guide's recommended method \n",
+    "       for getting an existing SparkSession or creating a new one.\"\"\"\n",
+    "    if (\"sparkSessionSingletonInstance\" not in globals()):\n",
+    "        globals()[\"sparkSessionSingletonInstance\"] = SparkSession \\\n",
+    "            .builder \\\n",
+    "            .config(conf=sparkConf) \\\n",
+    "            .getOrCreate()\n",
+    "    return globals()[\"sparkSessionSingletonInstance\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Function to display a Seaborn barplot based on the Spark DataFrame it receives. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def display_barplot(spark_df, x, y, time, scale=2.0, size=(16, 9)):\n",
+    "    \"\"\"Displays a Spark DataFrame's contents as a bar plot.\"\"\"\n",
+    "    df = spark_df.toPandas()\n",
+    "    \n",
+    "    # remove prior graph when new one is ready to display\n",
+    "    display.clear_output(wait=True) \n",
+    "    print(f'TIME: {time}')\n",
+    "    \n",
+    "    # create and configure a Figure containing a Seaborn barplot \n",
+    "    plt.figure(figsize=size)\n",
+    "    sns.set(font_scale=scale)\n",
+    "    barplot = sns.barplot(data=df, x=x, y=y, \n",
+    "                          palette=sns.color_palette('cool', 20))\n",
+    "    \n",
+    "    # rotate the x-axis labels 90 degrees for readability\n",
+    "    for item in barplot.get_xticklabels():\n",
+    "        item.set_rotation(90)\n",
+    "        \n",
+    "    plt.tight_layout()\n",
+    "    plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Function count_tags is called for every RDD to summarize the hashtag counts in that RDD, add them to the existing totals, then display an updated top-20 barplot."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def count_tags(time, rdd):\n",
+    "    \"\"\"Count hashtags and display top-20 in descending order.\"\"\"\n",
+    "    try:\n",
+    "        # get SparkSession\n",
+    "        spark = getSparkSessionInstance(rdd.context.getConf()) \n",
+    "        \n",
+    "        # map hashtag string-count tuples to Rows \n",
+    "        rows = rdd.map(\n",
+    "            lambda tag: Row(hashtag=tag[0], total=tag[1])) \n",
+    "        \n",
+    "        # create a DataFrame from the Row objects\n",
+    "        hashtags_df = spark.createDataFrame(rows)\n",
+    "\n",
+    "        # create a temporary table view for use with Spark SQL\n",
+    "        hashtags_df.createOrReplaceTempView('hashtags')\n",
+    "        \n",
+    "        # use Spark SQL to get the top 20 hashtags in descending order\n",
+    "        top20_df = spark.sql(\n",
+    "            \"\"\"select hashtag, total \n",
+    "               from hashtags \n",
+    "               order by total desc, hashtag asc \n",
+    "               limit 20\"\"\")\n",
+    "        display_barplot(top20_df, x='hashtag', y='total', time=time)\n",
+    "    except Exception as e:\n",
+    "        print(f'Exception: {e}')\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Main applications code sets up Spark streaming to read text from the `starttweetstream.py` script on localhost port 9876 and specifies how to process the tweets."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sc = SparkContext()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ssc = StreamingContext(sc, 10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ssc.checkpoint('hashtagsummarizer_checkpoint')  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stream = ssc.socketTextStream('localhost', 9876)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tokenized = stream.flatMap(lambda line: line.split())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mapped = tokenized.map(lambda hashtag: (hashtag, 1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hashtag_counts = mapped.updateStateByKey(\n",
+    "    lambda counts, prior_total: sum(counts) + (prior_total or 0)) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hashtag_counts.foreachRDD(count_tags)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ssc.start()  # start the Spark streaming"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#ssc.awaitTermination()  # wait for the streaming to finish"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##########################################################################\n",
+    "# (C) Copyright 2019 by Deitel & Associates, Inc. and                    #\n",
+    "# Pearson Education, Inc. All Rights Reserved.                           #\n",
+    "#                                                                        #\n",
+    "# DISCLAIMER: The authors and publisher of this book have used their     #\n",
+    "# best efforts in preparing the book. These efforts include the          #\n",
+    "# development, research, and testing of the theories and programs        #\n",
+    "# to determine their effectiveness. The authors and publisher make       #\n",
+    "# no warranty of any kind, expressed or implied, with regard to these    #\n",
+    "# programs or to the documentation contained in these books. The authors #\n",
+    "# and publisher shall not be liable in any event for incidental or       #\n",
+    "# consequential damages in connection with, or arising out of, the       #\n",
+    "# furnishing, performance, or use of these programs.                     #\n",
+    "##########################################################################"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
@@ -0,0 +1,6 @@
+consumer_key = 'YourConsumerKey'
+consumer_secret = 'YourConsumerSecret'
+access_token = 'YourAccessToken'
+access_token_secret = 'YourAccessTokenSecret'
+
+mapquest_key = 'YourAPIKey'
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+yarn jar /usr/hdp/current/hadoop-mapreduce-client/hadoop-streaming.jar -D mapred.output.key.comparator.class=org.apache.hadoop.mapred.lib.KeyFieldBasedComparator -D mapred.text.key.comparator.options=-n -files length_mapper.py,length_reducer.py -mapper length_mapper.py -reducer length_reducer.py -input /example/data/RomeoAndJuliet.txt -output /example/wordlengthsoutput`