forked from pdeitel/PythonFundamentalsLiveLessons
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSimpleLanguageTranslator.py
executable file
·178 lines (142 loc) · 7.59 KB
/
SimpleLanguageTranslator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# SimpleLanguageTranslator.py
"""Use IBM Watson Speech to Text, Language Translator and Text to Speech
APIs to enable English and Spanish speakers to communicate."""
from ibm_watson import SpeechToTextV1
from ibm_watson import LanguageTranslatorV3
from ibm_watson import TextToSpeechV1
import keys # contains your API keys for accessing Watson services
import pyaudio # used to record from mic
import pydub # used to load a WAV file
import pydub.playback # used to play a WAV file
import wave # used to save a WAV file
def run_translator():
"""Calls the functions that interact with Watson services."""
# Step 1: Prompt for then record English speech into an audio file
input('Press Enter then ask your question in English')
record_audio('english.wav')
# Step 2: Transcribe the English speech to English text
english = speech_to_text(
file_name='english.wav', model_id='en-US_BroadbandModel')
print('English:', english)
# Step 3: Translate the English text into Spanish text
spanish = translate(text_to_translate=english, model='en-es')
print('Spanish:', spanish)
# Step 4: Synthesize the Spanish text into Spanish speech
text_to_speech(text_to_speak=spanish, voice_to_use='es-US_SofiaVoice',
file_name='spanish.wav')
# Step 5: Play the Spanish audio file
play_audio(file_name='spanish.wav')
# Step 6: Prompt for then record Spanish speech into an audio file
input('Press Enter then speak the Spanish answer')
record_audio('spanishresponse.wav')
# Step 7: Transcribe the Spanish speech to Spanish text
spanish = speech_to_text(
file_name='spanishresponse.wav', model_id='es-ES_BroadbandModel')
print('Spanish response:', spanish)
# Step 8: Translate the Spanish text into English text
english = translate(text_to_translate=spanish, model='es-en')
print('English response:', english)
# Step 9: Synthesize the English text into English speech
text_to_speech(text_to_speak=english,
voice_to_use='en-US_AllisonVoice',
file_name='englishresponse.wav')
# Step 10: Play the English audio
play_audio(file_name='englishresponse.wav')
def speech_to_text(file_name, model_id):
"""Use Watson Speech to Text to convert audio file to text."""
# create Watson Speech to Text client
stt = SpeechToTextV1(iam_apikey=keys.speech_to_text_key)
# open the audio file
with open(file_name, 'rb') as audio_file:
# pass the file to Watson for transcription
result = stt.recognize(audio=audio_file,
content_type='audio/wav', model=model_id).get_result()
# Get the 'results' list. This may contain intermediate and final
# results, depending on method recognize's arguments. We asked
# for only final results, so this list contains one element.
results_list = result['results']
# Get the final speech recognition result--the list's only element.
speech_recognition_result = results_list[0]
# Get the 'alternatives' list. This may contain multiple alternative
# transcriptions, depending on method recognize's arguments. We did
# not ask for alternatives, so this list contains one element.
alternatives_list = speech_recognition_result['alternatives']
# Get the only alternative transcription from alternatives_list.
first_alternative = alternatives_list[0]
# Get the 'transcript' key's value, which contains the audio's
# text transcription.
transcript = first_alternative['transcript']
return transcript # return the audio's text transcription
def translate(text_to_translate, model):
"""Use Watson Language Translator to translate English to Spanish
(en-es) or Spanish to English (es-en) as specified by model."""
# create Watson Translator client
language_translator = LanguageTranslatorV3(version='2018-05-01',
iam_apikey=keys.translate_key)
# perform the translation
translated_text = language_translator.translate(
text=text_to_translate, model_id=model).get_result()
# Get 'translations' list. If method translate's text argument has
# multiple strings, the list will have multiple entries. We passed
# one string, so the list contains only one element.
translations_list = translated_text['translations']
# get translations_list's only element
first_translation = translations_list[0]
# get 'translation' key's value, which is the translated text
translation = first_translation['translation']
return translation # return the translated string
def text_to_speech(text_to_speak, voice_to_use, file_name):
"""Use Watson Text to Speech to convert text to specified voice
and save to a WAV file."""
# create Text to Speech client
tts = TextToSpeechV1(iam_apikey=keys.text_to_speech_key)
# open file and write the synthesized audio content into the file
with open(file_name, 'wb') as audio_file:
audio_file.write(tts.synthesize(text_to_speak,
accept='audio/wav', voice=voice_to_use).get_result().content)
def record_audio(file_name):
"""Use pyaudio to record 5 seconds of audio to a WAV file."""
FRAME_RATE = 44100 # number of frames per second
CHUNK = 1024 # number of frames read at a time
FORMAT = pyaudio.paInt16 # each frame is a 16-bit (2-byte) integer
CHANNELS = 2 # 2 samples per frame
SECONDS = 5 # total recording time
recorder = pyaudio.PyAudio() # opens/closes audio streams
# configure and open audio stream for recording (input=True)
audio_stream = recorder.open(format=FORMAT, channels=CHANNELS,
rate=FRAME_RATE, input=True, frames_per_buffer=CHUNK)
audio_frames = [] # stores raw bytes of mic input
print('Recording 5 seconds of audio')
# read 5 seconds of audio in CHUNK-sized pieces
for i in range(0, int(FRAME_RATE * SECONDS / CHUNK)):
audio_frames.append(audio_stream.read(CHUNK))
print('Recording complete')
audio_stream.stop_stream() # stop recording
audio_stream.close()
recorder.terminate() # release underlying resources used by PyAudio
# save audio_frames to a WAV file
with wave.open(file_name, 'wb') as output_file:
output_file.setnchannels(CHANNELS)
output_file.setsampwidth(recorder.get_sample_size(FORMAT))
output_file.setframerate(FRAME_RATE)
output_file.writeframes(b''.join(audio_frames))
def play_audio(file_name):
"""Use the pydub module (pip install pydub) to play a WAV file."""
sound = pydub.AudioSegment.from_wav(file_name)
pydub.playback.play(sound)
if __name__ == '__main__':
run_translator()
##########################################################################
# (C) Copyright 2019 by Deitel & Associates, Inc. and #
# Pearson Education, Inc. All Rights Reserved. #
# #
# DISCLAIMER: The authors and publisher of this book have used their #
# best efforts in preparing the book. These efforts include the #
# development, research, and testing of the theories and programs #
# to determine their effectiveness. The authors and publisher make #
# no warranty of any kind, expressed or implied, with regard to these #
# programs or to the documentation contained in these books. The authors #
# and publisher shall not be liable in any event for incidental or #
# consequential damages in connection with, or arising out of, the #
# furnishing, performance, or use of these programs. #
##########################################################################