Skip to content

Commit 2d3d684

Browse files
committed
Added missing examples for lessons 14-16
1 parent b85237e commit 2d3d684

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+4982
-0
lines changed

examples/ch02/fig02_01.py

100755100644
File mode changed.

examples/ch02/fig02_02.py

100755100644
File mode changed.

examples/ch03/fig03_01.py

100755100644
File mode changed.

examples/ch03/fig03_02.py

100755100644
File mode changed.

examples/ch04/fig04_01.py

100755100644
File mode changed.

examples/ch04/fig04_02.py

100755100644
File mode changed.

examples/ch04/fig04_03.py

100755100644
File mode changed.

examples/ch05/RollDie.py

100755100644
File mode changed.

examples/ch05/fig05_01.py

100755100644
File mode changed.

examples/ch06/RollDieDynamic.py

100755100644
File mode changed.

examples/ch06/fig06_01.py

100755100644
File mode changed.

examples/ch06/fig06_02.py

100755100644
File mode changed.

examples/ch09/dividebyzero.py

100755100644
File mode changed.

examples/ch10/account.py

100755100644
File mode changed.

examples/ch10/accountdoctest.py

100755100644
File mode changed.

examples/ch10/ave_hi_austin_jan_1895-2018.csv

100755100644
File mode changed.

examples/ch10/ave_hi_la_jan_1895-2018.csv

100755100644
File mode changed.

examples/ch10/ave_hi_nyc2_jan_1895-2018.csv

100755100644
File mode changed.

examples/ch10/ave_hi_nyc_jan_1895-2018.csv

100755100644
File mode changed.

examples/ch10/card.py

100755100644
File mode changed.

examples/ch10/commissionemployee.py

100755100644
File mode changed.

examples/ch10/complexnumber.py

100755100644
File mode changed.

examples/ch10/complexnumber2.py

100755100644
File mode changed.

examples/ch10/deck.py

100755100644
File mode changed.

examples/ch10/deck2.py

100755100644
File mode changed.

examples/ch10/private.py

100755100644
File mode changed.

examples/ch10/salariedcommissionemployee.py

100755100644
File mode changed.

examples/ch10/salariedemployee.py

100755100644
File mode changed.

examples/ch10/timewithproperties.py

100755100644
File mode changed.

examples/ch11/mask_circle.png

100755100644
File mode changed.

examples/ch11/mask_heart.png

100755100644
File mode changed.

examples/ch11/mask_oval.png

100755100644
File mode changed.

examples/ch11/mask_star.png

100755100644
File mode changed.

examples/ch12/keys.py

100755100644
File mode changed.

examples/ch12/tweetutilities.py

100755100644
File mode changed.

examples/ch13/keys.py

100755100644
File mode changed.

examples/ch14/ave_hi_nyc_jan_1895-2018.csv

100755100644
File mode changed.

examples/ch15/IMDB_RNN.ipynb

+847
Large diffs are not rendered by default.

examples/ch15/MNIST_CNN.ipynb

+772
Large diffs are not rendered by default.

examples/ch15/MNIST_CNN_TensorBoard.ipynb

+787
Large diffs are not rendered by default.
164 KB
750 KB

examples/ch15/images/PyFPCover.png

332 KB

examples/ch15/images/course_image.jpg

32.3 KB
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/usr/bin/env python3
2+
# length_mapper.py
3+
"""Maps lines of text to key-value pairs of word lengths and 1."""
4+
import sys
5+
6+
def tokenize_input():
7+
"""Split each line of standard input into a list of strings."""
8+
for line in sys.stdin:
9+
yield line.split()
10+
11+
# read each line in the the standard input and for every word
12+
# produce a key-value pair containing the word, a tab and 1
13+
for line in tokenize_input():
14+
for word in line:
15+
print(str(len(word)) + '\t1')
16+
17+
##########################################################################
18+
# (C) Copyright 2019 by Deitel & Associates, Inc. and #
19+
# Pearson Education, Inc. All Rights Reserved. #
20+
# #
21+
# DISCLAIMER: The authors and publisher of this book have used their #
22+
# best efforts in preparing the book. These efforts include the #
23+
# development, research, and testing of the theories and programs #
24+
# to determine their effectiveness. The authors and publisher make #
25+
# no warranty of any kind, expressed or implied, with regard to these #
26+
# programs or to the documentation contained in these books. The authors #
27+
# and publisher shall not be liable in any event for incidental or #
28+
# consequential damages in connection with, or arising out of, the #
29+
# furnishing, performance, or use of these programs. #
30+
##########################################################################
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#!/usr/bin/env python3
2+
# length_reducer.py
3+
"""Counts the number of words with each length."""
4+
import sys
5+
from itertools import groupby
6+
from operator import itemgetter
7+
8+
def tokenize_input():
9+
"""Split each line of standard input into a key and a value."""
10+
for line in sys.stdin:
11+
yield line.strip().split('\t')
12+
13+
# produce key-value pairs of word lengths and counts separated by tabs
14+
for word_length, group in groupby(tokenize_input(), itemgetter(0)):
15+
try:
16+
total = sum(int(count) for word_length, count in group)
17+
print(word_length + '\t' + str(total))
18+
except ValueError:
19+
pass # ignore word if its count was not an integer
20+
21+
##########################################################################
22+
# (C) Copyright 2019 by Deitel & Associates, Inc. and #
23+
# Pearson Education, Inc. All Rights Reserved. #
24+
# #
25+
# DISCLAIMER: The authors and publisher of this book have used their #
26+
# best efforts in preparing the book. These efforts include the #
27+
# development, research, and testing of the theories and programs #
28+
# to determine their effectiveness. The authors and publisher make #
29+
# no warranty of any kind, expressed or implied, with regard to these #
30+
# programs or to the documentation contained in these books. The authors #
31+
# and publisher shall not be liable in any event for incidental or #
32+
# consequential damages in connection with, or arising out of, the #
33+
# furnishing, performance, or use of these programs. #
34+
##########################################################################
+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
yarn jar /usr/hdp/current/hadoop-mapreduce-client/hadoop-streaming.jar -D mapred.output.key.comparator.class=org.apache.hadoop.mapred.lib.KeyFieldBasedComparator -D mapred.text.key.comparator.options=-n -files length_mapper.py,length_reducer.py -mapper length_mapper.py -reducer length_reducer.py -input /example/data/RomeoAndJuliet.txt -output /example/wordlengthsoutput

examples/ch16/README.txt

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Unlike the prior chapters, any snippet files and notebook files
2+
are located in the example folders mentioned in each section.
3+
4+
Some examples were implemented only in notebooks because they
5+
execute in cloud-based cluster environments.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"Import this app's dependencies."
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": null,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"from pyspark import SparkContext\n",
17+
"from pyspark.streaming import StreamingContext\n",
18+
"from pyspark.sql import Row, SparkSession\n",
19+
"from IPython import display\n",
20+
"import matplotlib.pyplot as plt\n",
21+
"import seaborn as sns\n",
22+
"%matplotlib inline "
23+
]
24+
},
25+
{
26+
"cell_type": "markdown",
27+
"metadata": {},
28+
"source": [
29+
"From the PySpark Streaming Programming Guide at https://spark.apache.org/docs/latest/streaming-programming-guide.html#dataframe-and-sql-operations. This is the recommended way for each cluster node to get the SparkSession."
30+
]
31+
},
32+
{
33+
"cell_type": "code",
34+
"execution_count": null,
35+
"metadata": {},
36+
"outputs": [],
37+
"source": [
38+
"def getSparkSessionInstance(sparkConf):\n",
39+
" \"\"\"Spark Streaming Programming Guide's recommended method \n",
40+
" for getting an existing SparkSession or creating a new one.\"\"\"\n",
41+
" if (\"sparkSessionSingletonInstance\" not in globals()):\n",
42+
" globals()[\"sparkSessionSingletonInstance\"] = SparkSession \\\n",
43+
" .builder \\\n",
44+
" .config(conf=sparkConf) \\\n",
45+
" .getOrCreate()\n",
46+
" return globals()[\"sparkSessionSingletonInstance\"]"
47+
]
48+
},
49+
{
50+
"cell_type": "markdown",
51+
"metadata": {},
52+
"source": [
53+
"Function to display a Seaborn barplot based on the Spark DataFrame it receives. "
54+
]
55+
},
56+
{
57+
"cell_type": "code",
58+
"execution_count": null,
59+
"metadata": {},
60+
"outputs": [],
61+
"source": [
62+
"def display_barplot(spark_df, x, y, time, scale=2.0, size=(16, 9)):\n",
63+
" \"\"\"Displays a Spark DataFrame's contents as a bar plot.\"\"\"\n",
64+
" df = spark_df.toPandas()\n",
65+
" \n",
66+
" # remove prior graph when new one is ready to display\n",
67+
" display.clear_output(wait=True) \n",
68+
" print(f'TIME: {time}')\n",
69+
" \n",
70+
" # create and configure a Figure containing a Seaborn barplot \n",
71+
" plt.figure(figsize=size)\n",
72+
" sns.set(font_scale=scale)\n",
73+
" barplot = sns.barplot(data=df, x=x, y=y, \n",
74+
" palette=sns.color_palette('cool', 20))\n",
75+
" \n",
76+
" # rotate the x-axis labels 90 degrees for readability\n",
77+
" for item in barplot.get_xticklabels():\n",
78+
" item.set_rotation(90)\n",
79+
" \n",
80+
" plt.tight_layout()\n",
81+
" plt.show()"
82+
]
83+
},
84+
{
85+
"cell_type": "markdown",
86+
"metadata": {},
87+
"source": [
88+
"Function count_tags is called for every RDD to summarize the hashtag counts in that RDD, add them to the existing totals, then display an updated top-20 barplot."
89+
]
90+
},
91+
{
92+
"cell_type": "code",
93+
"execution_count": null,
94+
"metadata": {},
95+
"outputs": [],
96+
"source": [
97+
"def count_tags(time, rdd):\n",
98+
" \"\"\"Count hashtags and display top-20 in descending order.\"\"\"\n",
99+
" try:\n",
100+
" # get SparkSession\n",
101+
" spark = getSparkSessionInstance(rdd.context.getConf()) \n",
102+
" \n",
103+
" # map hashtag string-count tuples to Rows \n",
104+
" rows = rdd.map(\n",
105+
" lambda tag: Row(hashtag=tag[0], total=tag[1])) \n",
106+
" \n",
107+
" # create a DataFrame from the Row objects\n",
108+
" hashtags_df = spark.createDataFrame(rows)\n",
109+
"\n",
110+
" # create a temporary table view for use with Spark SQL\n",
111+
" hashtags_df.createOrReplaceTempView('hashtags')\n",
112+
" \n",
113+
" # use Spark SQL to get the top 20 hashtags in descending order\n",
114+
" top20_df = spark.sql(\n",
115+
" \"\"\"select hashtag, total \n",
116+
" from hashtags \n",
117+
" order by total desc, hashtag asc \n",
118+
" limit 20\"\"\")\n",
119+
" display_barplot(top20_df, x='hashtag', y='total', time=time)\n",
120+
" except Exception as e:\n",
121+
" print(f'Exception: {e}')\n"
122+
]
123+
},
124+
{
125+
"cell_type": "markdown",
126+
"metadata": {},
127+
"source": [
128+
"Main applications code sets up Spark streaming to read text from the `starttweetstream.py` script on localhost port 9876 and specifies how to process the tweets."
129+
]
130+
},
131+
{
132+
"cell_type": "code",
133+
"execution_count": null,
134+
"metadata": {},
135+
"outputs": [],
136+
"source": [
137+
"sc = SparkContext()"
138+
]
139+
},
140+
{
141+
"cell_type": "code",
142+
"execution_count": null,
143+
"metadata": {},
144+
"outputs": [],
145+
"source": [
146+
"ssc = StreamingContext(sc, 10)"
147+
]
148+
},
149+
{
150+
"cell_type": "code",
151+
"execution_count": null,
152+
"metadata": {},
153+
"outputs": [],
154+
"source": [
155+
"ssc.checkpoint('hashtagsummarizer_checkpoint') "
156+
]
157+
},
158+
{
159+
"cell_type": "code",
160+
"execution_count": null,
161+
"metadata": {},
162+
"outputs": [],
163+
"source": [
164+
"stream = ssc.socketTextStream('localhost', 9876)"
165+
]
166+
},
167+
{
168+
"cell_type": "code",
169+
"execution_count": null,
170+
"metadata": {},
171+
"outputs": [],
172+
"source": [
173+
"tokenized = stream.flatMap(lambda line: line.split())"
174+
]
175+
},
176+
{
177+
"cell_type": "code",
178+
"execution_count": null,
179+
"metadata": {},
180+
"outputs": [],
181+
"source": [
182+
"mapped = tokenized.map(lambda hashtag: (hashtag, 1))"
183+
]
184+
},
185+
{
186+
"cell_type": "code",
187+
"execution_count": null,
188+
"metadata": {},
189+
"outputs": [],
190+
"source": [
191+
"hashtag_counts = mapped.updateStateByKey(\n",
192+
" lambda counts, prior_total: sum(counts) + (prior_total or 0)) "
193+
]
194+
},
195+
{
196+
"cell_type": "code",
197+
"execution_count": null,
198+
"metadata": {},
199+
"outputs": [],
200+
"source": [
201+
"hashtag_counts.foreachRDD(count_tags)"
202+
]
203+
},
204+
{
205+
"cell_type": "code",
206+
"execution_count": null,
207+
"metadata": {},
208+
"outputs": [],
209+
"source": [
210+
"ssc.start() # start the Spark streaming"
211+
]
212+
},
213+
{
214+
"cell_type": "code",
215+
"execution_count": null,
216+
"metadata": {},
217+
"outputs": [],
218+
"source": [
219+
"#ssc.awaitTermination() # wait for the streaming to finish"
220+
]
221+
},
222+
{
223+
"cell_type": "code",
224+
"execution_count": null,
225+
"metadata": {},
226+
"outputs": [],
227+
"source": [
228+
"##########################################################################\n",
229+
"# (C) Copyright 2019 by Deitel & Associates, Inc. and #\n",
230+
"# Pearson Education, Inc. All Rights Reserved. #\n",
231+
"# #\n",
232+
"# DISCLAIMER: The authors and publisher of this book have used their #\n",
233+
"# best efforts in preparing the book. These efforts include the #\n",
234+
"# development, research, and testing of the theories and programs #\n",
235+
"# to determine their effectiveness. The authors and publisher make #\n",
236+
"# no warranty of any kind, expressed or implied, with regard to these #\n",
237+
"# programs or to the documentation contained in these books. The authors #\n",
238+
"# and publisher shall not be liable in any event for incidental or #\n",
239+
"# consequential damages in connection with, or arising out of, the #\n",
240+
"# furnishing, performance, or use of these programs. #\n",
241+
"##########################################################################"
242+
]
243+
}
244+
],
245+
"metadata": {
246+
"kernelspec": {
247+
"display_name": "Python 3",
248+
"language": "python",
249+
"name": "python3"
250+
},
251+
"language_info": {
252+
"codemirror_mode": {
253+
"name": "ipython",
254+
"version": 3
255+
},
256+
"file_extension": ".py",
257+
"mimetype": "text/x-python",
258+
"name": "python",
259+
"nbconvert_exporter": "python",
260+
"pygments_lexer": "ipython3",
261+
"version": "3.6.7"
262+
}
263+
},
264+
"nbformat": 4,
265+
"nbformat_minor": 2
266+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
consumer_key = 'YourConsumerKey'
2+
consumer_secret = 'YourConsumerSecret'
3+
access_token = 'YourAccessToken'
4+
access_token_secret = 'YourAccessTokenSecret'
5+
6+
mapquest_key = 'YourAPIKey'

0 commit comments

Comments
 (0)