Skip to content

Commit b746515

Browse files
committed
changes for September 2023 class
1 parent 3c1f5d9 commit b746515

10 files changed

+253
-279
lines changed

Dockerfile copy

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Based on the Dockerfiles from the Jupyter Development Team which
2+
# are Copyright (c) Jupyter Development Team and distributed under
3+
# the terms of the Modified BSD License.
4+
ARG OWNER=jupyter
5+
ARG BASE_CONTAINER=$OWNER/pyspark-notebook
6+
FROM $BASE_CONTAINER
7+
8+
LABEL maintainer="Paul Deitel <paul@deitel.com>"
9+
10+
# Fix: https://github.com/hadolint/hadolint/wiki/DL4006
11+
# Fix: https://github.com/koalaman/shellcheck/wiki/SC3014
12+
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
13+
14+
RUN mamba install --yes \
15+
'dnspython' \
16+
'folium' \
17+
'geopy' \
18+
'imageio' \
19+
'nltk' \
20+
'pymongo' \
21+
'scikit-learn' \
22+
'spacy' \
23+
'tweepy'
24+
25+
RUN pip install --upgrade \
26+
'tensorflow' \
27+
'openai' \
28+
'beautifulsoup4' \
29+
'deepl' \
30+
'mastodon.py' \
31+
'better_profanity' \
32+
'tweet-preprocessor' \
33+
'ibm-watson' \
34+
'pubnub' \
35+
'textblob' \
36+
'wordcloud' \
37+
'dweepy'
38+
39+
# download data required by textblob and spacy
40+
RUN python -m textblob.download_corpora && \
41+
python -m spacy download en_core_web_sm && \
42+
python -m spacy download en_core_web_md && \
43+
python -m spacy download en_core_web_lg
44+
45+
# clean up
46+
RUN mamba clean --all -f -y && \
47+
fix-permissions "${CONDA_DIR}" && \
48+
fix-permissions "/home/${NB_USER}"

ch12_Mastodon/ch12_Mastodon.ipynb

+37-10
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,18 @@
736736
},
737737
"outputs": [],
738738
"source": [
739-
"for account in sorted(accounts[:3], key=lambda acct: acct.followers_count, reverse=True):\n",
739+
"sorted_accounts = sorted(accounts, key=lambda acct: acct.followers_count, reverse=True)"
740+
]
741+
},
742+
{
743+
"cell_type": "code",
744+
"execution_count": null,
745+
"metadata": {
746+
"tags": []
747+
},
748+
"outputs": [],
749+
"source": [
750+
"for account in sorted_accounts[:3]:\n",
740751
" print('username: ', account.username)\n",
741752
" print('id: ', account.id)\n",
742753
" print('url: ', account.url)\n",
@@ -1124,7 +1135,9 @@
11241135
{
11251136
"cell_type": "code",
11261137
"execution_count": null,
1127-
"metadata": {},
1138+
"metadata": {
1139+
"tags": []
1140+
},
11281141
"outputs": [],
11291142
"source": [
11301143
"toot_text = '<p style=\"padding-left: 3em\">A sample fake toot with a URL https://nasa.gov</p>'"
@@ -1140,7 +1153,9 @@
11401153
{
11411154
"cell_type": "code",
11421155
"execution_count": null,
1143-
"metadata": {},
1156+
"metadata": {
1157+
"tags": []
1158+
},
11441159
"outputs": [],
11451160
"source": [
11461161
"from bs4 import BeautifulSoup"
@@ -1149,7 +1164,9 @@
11491164
{
11501165
"cell_type": "code",
11511166
"execution_count": null,
1152-
"metadata": {},
1167+
"metadata": {
1168+
"tags": []
1169+
},
11531170
"outputs": [],
11541171
"source": [
11551172
"soup = BeautifulSoup(toot_text, 'html.parser') "
@@ -1158,7 +1175,9 @@
11581175
{
11591176
"cell_type": "code",
11601177
"execution_count": null,
1161-
"metadata": {},
1178+
"metadata": {
1179+
"tags": []
1180+
},
11621181
"outputs": [],
11631182
"source": [
11641183
"plain_text = soup.get_text() # remove all HTML/CSS tags and commands"
@@ -1196,7 +1215,9 @@
11961215
{
11971216
"cell_type": "code",
11981217
"execution_count": null,
1199-
"metadata": {},
1218+
"metadata": {
1219+
"tags": []
1220+
},
12001221
"outputs": [],
12011222
"source": [
12021223
"p.set_options(p.OPT.URL)"
@@ -1487,7 +1508,9 @@
14871508
{
14881509
"cell_type": "code",
14891510
"execution_count": null,
1490-
"metadata": {},
1511+
"metadata": {
1512+
"tags": []
1513+
},
14911514
"outputs": [],
14921515
"source": [
14931516
"limit = 10"
@@ -1503,7 +1526,9 @@
15031526
{
15041527
"cell_type": "code",
15051528
"execution_count": null,
1506-
"metadata": {},
1529+
"metadata": {
1530+
"tags": []
1531+
},
15071532
"outputs": [],
15081533
"source": [
15091534
"sentiment_dict = {'positive': 0, 'neutral': 0, 'negative': 0}"
@@ -1774,7 +1799,9 @@
17741799
{
17751800
"cell_type": "code",
17761801
"execution_count": null,
1777-
"metadata": {},
1802+
"metadata": {
1803+
"tags": []
1804+
},
17781805
"outputs": [],
17791806
"source": [
17801807
"bad_locations "
@@ -2201,7 +2228,7 @@
22012228
"name": "python",
22022229
"nbconvert_exporter": "python",
22032230
"pygments_lexer": "ipython3",
2204-
"version": "3.10.11"
2231+
"version": "3.11.5"
22052232
}
22062233
},
22072234
"nbformat": 4,

ch13/Ch13.ipynb

+53-40
Original file line numberDiff line numberDiff line change
@@ -176,18 +176,9 @@
176176
"source": [
177177
"### Modules We’ll Need for Audio Recording and Playback (2 of 2)\n",
178178
"```\n",
179-
"pip install pyaudio # Windows Users conda install pyaudio\n",
180-
"pip install pydub \n",
181-
"```\n",
182-
"\n",
183-
"These are also installable now with conda, which will auto install `portaudio` if necessary:\n",
184-
"```\n",
185-
"conda install pyaudio \n",
186-
"conda install pydub \n",
187-
"```\n",
188-
"\n",
189-
"**Mac users** might first need to execute\n",
190-
">`conda install -c conda-forge portaudio`\n"
179+
"pip install sounddevice \n",
180+
"pip install simpleaudio \n",
181+
"```"
191182
]
192183
},
193184
{
@@ -322,14 +313,14 @@
322313
"### Other Imported Modules\n",
323314
"```python\n",
324315
"import keys # contains your API keys for accessing Watson services\n",
325-
"import pyaudio # used to record from mic\n",
326-
"import pydub # used to load a WAV file\n",
327-
"import pydub.playback # used to play a WAV file\n",
328-
"import wave # used to save a WAV file\n",
316+
"import wave \n",
317+
"import simpleaudio as sa\n",
318+
"import sounddevice as sd\n",
319+
"from scipy.io.wavfile import write\n",
329320
"```\n",
330321
"\n",
331-
"* **`pyaudio`** for **recording audio** \n",
332-
"* **`pydub`** and **`pydub.playback`** to **load and play audio files**\n",
322+
"* **`sounddevice`** for **recording audio** \n",
323+
"* **`simpleaudio`** to **load and play audio files**\n",
333324
"* **`wave`** to save **WAV (Waveform Audio File Format) files**"
334325
]
335326
},
@@ -370,13 +361,12 @@
370361
"### Main Program: Function `run_translator` (2 of 6)\n",
371362
"* **Step 2**: Call **`speech_to_text`**\n",
372363
" * **Speech to Text service** transcribes text using **predefined models**\n",
373-
" * Most languages have **broadband** (**>=16kHZ**) and **narrowband** (**<16kHZ**) models (based on **audio quality**)\n",
374-
" * App **captures** audio at **44.1 kHZ**, so we use **`'en-US_BroadbandModel'`**\n",
364+
" * They now have general multimedia models and models optimized for telephone audio \n",
375365
"\n",
376366
"```python\n",
377367
" # Step 2: Transcribe the English speech to English text\n",
378368
" english = speech_to_text(\n",
379-
" file_name='english.wav', model_id='en-US_BroadbandModel')\n",
369+
" file_name='english.wav', model_id='en-US_Multimedia')\n",
380370
" print('English:', english) # display transcription\n",
381371
"```"
382372
]
@@ -415,11 +405,12 @@
415405
"metadata": {},
416406
"source": [
417407
"### Main Program: Function `run_translator` (4 of 6)\n",
418-
"* **Voice `'es-US_SofiaVoice'`** is for Spanish as spoken in the U.S.\n",
408+
"* **Voice `'es-US_SofiaV3Voice'`** is for Spanish as spoken in the U.S.\n",
419409
"\n",
420410
"```python \n",
421411
" # Step 4: Synthesize the Spanish text into Spanish speech \n",
422-
" text_to_speech(text_to_speak=spanish, voice_to_use='es-US_SofiaVoice',\n",
412+
" text_to_speech(text_to_speak=spanish, \n",
413+
" voice_to_use='es-US_SofiaV3Voice',\n",
423414
" file_name='spanish.wav')\n",
424415
"```"
425416
]
@@ -458,9 +449,9 @@
458449
"### Main Program: Function `run_translator` (6 of 6)\n",
459450
"* **Steps 6–10** repeat previous steps for **Spanish speech to English speech**: \n",
460451
" * **Step 6** **records** the Spanish audio\n",
461-
" * **Step 7** **transcribes** the **Spanish audio** to Spanish text using predefined model **`'es-ES_BroadbandModel'`**\n",
452+
" * **Step 7** **transcribes** the **Spanish audio** to Spanish text using predefined model **`'es-ES_Multimedia'`**\n",
462453
" * **Step 8** **translates** the **Spanish text** to English text using predefined model **`'es-en'`** (Spanish-to-English)\n",
463-
" * **Step 9** **creates** the **English audio** using **`'en-US_AllisonVoice'`**\n",
454+
" * **Step 9** **creates** the **English audio** using **`'en-US_AllisonV3Voice'`**\n",
464455
" * **Step 10** **plays** the English **audio**"
465456
]
466457
},
@@ -475,7 +466,8 @@
475466
"\n",
476467
" # Step 7: Transcribe the Spanish speech to Spanish text\n",
477468
" spanish = speech_to_text(\n",
478-
" file_name='spanishresponse.wav', model_id='es-ES_BroadbandModel')\n",
469+
" file_name='spanishresponse.wav', \n",
470+
" model_id='es-ES_Multimedia')\n",
479471
" print('Spanish response:', spanish)\n",
480472
"\n",
481473
" # Step 8: Translate the Spanish text to English text\n",
@@ -484,7 +476,7 @@
484476
"\n",
485477
" # Step 9: Synthesize the English text to English speech\n",
486478
" text_to_speech(text_to_speak=english,\n",
487-
" voice_to_use='en-US_AllisonVoice',\n",
479+
" voice_to_use='en-US_AllisonV3Voice',\n",
488480
" file_name='englishresponse.wav')\n",
489481
"\n",
490482
" # Step 10: Play the English audio\n",
@@ -507,10 +499,8 @@
507499
"```python\n",
508500
"def speech_to_text(file_name, model_id):\n",
509501
" \"\"\"Use Watson Speech to Text to convert audio file to text.\"\"\"\n",
510-
" # create Watson Speech to Text client \n",
511-
" # OLD: stt = SpeechToTextV1(iam_apikey=keys.speech_to_text_key)\n",
512-
" authenticator = IAMAuthenticator(keys.speech_to_text_key) # *** NEW\n",
513-
" stt = SpeechToTextV1(authenticator=authenticator) # *** NEW\n",
502+
" authenticator = IAMAuthenticator(keys.speech_to_text_key) \n",
503+
" stt = SpeechToTextV1(authenticator=authenticator)\n",
514504
"```"
515505
]
516506
},
@@ -552,7 +542,22 @@
552542
" * Useful when transcribing **live audio**, such as a newscast\n",
553543
" * [Method `recognize`’s arguments and JSON response details](https://www.ibm.com/watson/developercloud/speech-to-text/api/v1/python.html?python#recognize-sessionless).\n",
554544
"* **`getResult` method** returns **JSON** containing **`transcript`**:\n",
555-
" ![JSON returned from SpeechToTextV1 recognize method](./ch13images/RecognizeDetailedResponse.png \"JSON returned from SpeechToTextV1 recognize method\")\n"
545+
"```json\n",
546+
"{\n",
547+
" \"result_index\": 0,\n",
548+
" \"results\": [\n",
549+
" {\n",
550+
" \"final\": true,\n",
551+
" \"alternatives\": [\n",
552+
" {\n",
553+
" \"transcript\": \"where is the nearest bathroom \",\n",
554+
" \"confidence\": 0.96\n",
555+
" }\n",
556+
" ]\n",
557+
" }\n",
558+
" ]\n",
559+
"}\n",
560+
"```"
556561
]
557562
},
558563
{
@@ -627,10 +632,9 @@
627632
" \"\"\"Use Watson Language Translator to translate English to Spanish \n",
628633
" (en-es) or Spanish to English (es-en) as specified by model.\"\"\"\n",
629634
" # create Watson Translator client\n",
630-
" # OLD: language_translator = LanguageTranslatorV3(version='2018-05-01', iam_apikey=keys.translate_key)\n",
631-
" authenticator = IAMAuthenticator(keys.translate_key) # *** NEW\n",
635+
" authenticator = IAMAuthenticator(keys.translate_key) \n",
632636
" language_translator = LanguageTranslatorV3(version='2018-05-31',\n",
633-
" authenticator=authenticator) # *** NEW\n",
637+
" authenticator=authenticator)\n",
634638
"\n",
635639
" # perform the translation\n",
636640
" translated_text = language_translator.translate(\n",
@@ -650,8 +654,18 @@
650654
"metadata": {},
651655
"source": [
652656
"### Function `translate` Returns a **`DetailedResponse`** (4 of 4)\n",
653-
"* **`getResult` method** returns **JSON** containing **translation**: \n",
654-
" ![JSON returned from LanguageTranslatorV3 translate method](./ch13images/TranslateDetailedResponse.png \"JSON returned from LanguageTranslatorV3 translate method\")\n"
657+
"* **`getResult` method** returns **JSON** containing **translation** \"donde es el baño más cercano\": \n",
658+
"```json\n",
659+
"{\n",
660+
" \"translations\": [\n",
661+
" {\n",
662+
" \"translation\": \"donde es el ba\\u00f1o m\\u00e1s cercano \"\n",
663+
" }\n",
664+
" ],\n",
665+
" \"word_count\": 5,\n",
666+
" \"character_count\": 30\n",
667+
"}\n",
668+
"```"
655669
]
656670
},
657671
{
@@ -702,8 +716,7 @@
702716
" \"\"\"Use Watson Text to Speech to convert text to specified voice\n",
703717
" and save to a WAV file.\"\"\"\n",
704718
" # create Text to Speech client\n",
705-
" # OLD: tts = TextToSpeechV1(iam_apikey=keys.text_to_speech_key)\n",
706-
" authenticator = IAMAuthenticator(keys.text_to_speech_key) # *** NEW\n",
719+
" authenticator = IAMAuthenticator(keys.text_to_speech_key)\n",
707720
" tts = TextToSpeechV1(authenticator=authenticator)\n",
708721
"\n",
709722
" # open file and write the synthesized audio content into the file\n",
@@ -1251,7 +1264,7 @@
12511264
"name": "python",
12521265
"nbconvert_exporter": "python",
12531266
"pygments_lexer": "ipython3",
1254-
"version": "3.10.11"
1267+
"version": "3.11.5"
12551268
}
12561269
},
12571270
"nbformat": 4,

0 commit comments

Comments
 (0)