Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit c310941

Browse filesBrowse files
authored
Diarization Output Modified (GoogleCloudPlatform#1586)
* Printing the last paragraph only. * Python3 print * Removing sample rate setting * Adding the missing output parameter in the example * Changes based on the comments * Removed filenames as input parameters * Removed unused args * Updated README file * Updated the inline comment * Modified code to make it more readable * Simplified the response object processing. * Fixing the long line issue.
1 parent 809b232 commit c310941
Copy full SHA for c310941

File tree

Expand file treeCollapse file tree

4 files changed

+61
-80
lines changed
Filter options
Expand file treeCollapse file tree

4 files changed

+61
-80
lines changed

‎speech/cloud-client/README.rst

Copy file name to clipboardExpand all lines: speech/cloud-client/README.rst
+8-11Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -221,25 +221,22 @@ To run this sample:
221221
222222
$ python beta_snippets.py
223223
224-
usage: beta_snippets.py [-h] command path first second
224+
usage: beta_snippets.py [-h] command
225225
226226
Google Cloud Speech API sample that demonstrates enhanced models
227227
and recognition metadata.
228228
229229
Example usage:
230-
python beta_snippets.py enhanced-model resources/commercial_mono.wav
231-
python beta_snippets.py metadata resources/commercial_mono.wav
232-
python beta_snippets.py punctuation resources/commercial_mono.wav
233-
python beta_snippets.py diarization resources/commercial_mono.wav
234-
python beta_snippets.py multi-channel resources/commercial_mono.wav
235-
python beta_snippets.py multi-language resources/multi.wav en-US es
236-
python beta_snippets.py word-level-conf resources/commercial_mono.wav
230+
python beta_snippets.py enhanced-model
231+
python beta_snippets.py metadata
232+
python beta_snippets.py punctuation
233+
python beta_snippets.py diarization
234+
python beta_snippets.py multi-channel
235+
python beta_snippets.py multi-language
236+
python beta_snippets.py word-level-conf
237237
238238
positional arguments:
239239
command
240-
path File for audio file to be recognized
241-
first First language in audio file to be recognized
242-
second Second language in audio file to be recognized
243240
244241
optional arguments:
245242
-h, --help show this help message and exit

‎speech/cloud-client/beta_snippets.py

Copy file name to clipboardExpand all lines: speech/cloud-client/beta_snippets.py
+44-53Lines changed: 44 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -18,27 +18,26 @@
1818
and recognition metadata.
1919
2020
Example usage:
21-
python beta_snippets.py enhanced-model resources/commercial_mono.wav
22-
python beta_snippets.py metadata resources/commercial_mono.wav
23-
python beta_snippets.py punctuation resources/commercial_mono.wav
24-
python beta_snippets.py diarization resources/commercial_mono.wav
25-
python beta_snippets.py multi-channel resources/commercial_mono.wav
26-
python beta_snippets.py multi-language resources/multi.wav en-US es
27-
python beta_snippets.py word-level-conf resources/commercial_mono.wav
21+
python beta_snippets.py enhanced-model
22+
python beta_snippets.py metadata
23+
python beta_snippets.py punctuation
24+
python beta_snippets.py diarization
25+
python beta_snippets.py multi-channel
26+
python beta_snippets.py multi-language
27+
python beta_snippets.py word-level-conf
2828
"""
2929

3030
import argparse
3131
import io
3232

3333

34-
def transcribe_file_with_enhanced_model(speech_file):
34+
def transcribe_file_with_enhanced_model():
3535
"""Transcribe the given audio file using an enhanced model."""
3636
# [START speech_transcribe_file_with_enhanced_model]
3737
from google.cloud import speech_v1p1beta1 as speech
3838
client = speech.SpeechClient()
3939

40-
# TODO(developer): Uncomment and set to a path to your audio file.
41-
# speech_file = 'path/to/file.wav'
40+
speech_file = 'resources/commercial_mono.wav'
4241

4342
with io.open(speech_file, 'rb') as audio_file:
4443
content = audio_file.read()
@@ -64,14 +63,13 @@ def transcribe_file_with_enhanced_model(speech_file):
6463
# [END speech_transcribe_file_with_enhanced_model]
6564

6665

67-
def transcribe_file_with_metadata(speech_file):
66+
def transcribe_file_with_metadata():
6867
"""Send a request that includes recognition metadata."""
6968
# [START speech_transcribe_file_with_metadata]
7069
from google.cloud import speech_v1p1beta1 as speech
7170
client = speech.SpeechClient()
7271

73-
# TODO(developer): Uncomment and set to a path to your audio file.
74-
# speech_file = 'path/to/file.wav'
72+
speech_file = 'resources/commercial_mono.wav'
7573

7674
with io.open(speech_file, 'rb') as audio_file:
7775
content = audio_file.read()
@@ -110,14 +108,13 @@ def transcribe_file_with_metadata(speech_file):
110108
# [END speech_transcribe_file_with_metadata]
111109

112110

113-
def transcribe_file_with_auto_punctuation(speech_file):
111+
def transcribe_file_with_auto_punctuation():
114112
"""Transcribe the given audio file with auto punctuation enabled."""
115113
# [START speech_transcribe_file_with_auto_punctuation]
116114
from google.cloud import speech_v1p1beta1 as speech
117115
client = speech.SpeechClient()
118116

119-
# TODO(developer): Uncomment and set to a path to your audio file.
120-
# speech_file = 'path/to/file.wav'
117+
speech_file = 'resources/commercial_mono.wav'
121118

122119
with io.open(speech_file, 'rb') as audio_file:
123120
content = audio_file.read()
@@ -140,14 +137,13 @@ def transcribe_file_with_auto_punctuation(speech_file):
140137
# [END speech_transcribe_file_with_auto_punctuation]
141138

142139

143-
def transcribe_file_with_diarization(speech_file):
140+
def transcribe_file_with_diarization():
144141
"""Transcribe the given audio file synchronously with diarization."""
145142
# [START speech_transcribe_diarization]
146143
from google.cloud import speech_v1p1beta1 as speech
147144
client = speech.SpeechClient()
148145

149-
# TODO(developer): Uncomment and set to a path to your audio file.
150-
# speech_file = 'path/to/file.wav'
146+
speech_file = 'resources/commercial_mono.wav'
151147

152148
with open(speech_file, 'rb') as audio_file:
153149
content = audio_file.read()
@@ -156,33 +152,37 @@ def transcribe_file_with_diarization(speech_file):
156152

157153
config = speech.types.RecognitionConfig(
158154
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
159-
sample_rate_hertz=16000,
155+
sample_rate_hertz=8000,
160156
language_code='en-US',
161157
enable_speaker_diarization=True,
162158
diarization_speaker_count=2)
163159

164160
print('Waiting for operation to complete...')
165161
response = client.recognize(config, audio)
166162

167-
for i, result in enumerate(response.results):
168-
alternative = result.alternatives[0]
169-
print('-' * 20)
170-
print('First alternative of result {}: {}'
171-
.format(i, alternative.transcript))
172-
print('Speaker Tag for the first word: {}'
173-
.format(alternative.words[0].speaker_tag))
163+
# The transcript within each result is separate and sequential per result.
164+
# However, the words list within an alternative includes all the words
165+
# from all the results thus far. Thus, to get all the words with speaker
166+
# tags, you only have to take the words list from the last result:
167+
result = response.results[-1]
168+
169+
words_info = result.alternatives[0].words
170+
171+
# Printing out the output:
172+
for word_info in words_info:
173+
print("word: '{}', speaker_tag: {}".format(word_info.word,
174+
word_info.speaker_tag))
174175
# [END speech_transcribe_diarization]
175176

176177

177-
def transcribe_file_with_multichannel(speech_file):
178+
def transcribe_file_with_multichannel():
178179
"""Transcribe the given audio file synchronously with
179180
multi channel."""
180181
# [START speech_transcribe_multichannel]
181182
from google.cloud import speech_v1p1beta1 as speech
182183
client = speech.SpeechClient()
183184

184-
# TODO(developer): Uncomment and set to a path to your audio file.
185-
# speech_file = 'path/to/file.wav'
185+
speech_file = 'resources/Google_Gnome.wav'
186186

187187
with open(speech_file, 'rb') as audio_file:
188188
content = audio_file.read()
@@ -207,17 +207,16 @@ def transcribe_file_with_multichannel(speech_file):
207207
# [END speech_transcribe_multichannel]
208208

209209

210-
def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang):
210+
def transcribe_file_with_multilanguage():
211211
"""Transcribe the given audio file synchronously with
212212
multi language."""
213213
# [START speech_transcribe_multilanguage]
214214
from google.cloud import speech_v1p1beta1 as speech
215215
client = speech.SpeechClient()
216216

217-
# TODO(developer): Uncomment and set to a path to your audio file.
218-
# speech_file = 'path/to/file.wav'
219-
# first_lang = first language code, e,g, 'en-US'
220-
# second_lang = first language code, e,g, 'es'
217+
speech_file = 'resources/multi.wav'
218+
first_lang = 'en-US'
219+
second_lang = 'es'
221220

222221
with open(speech_file, 'rb') as audio_file:
223222
content = audio_file.read()
@@ -226,6 +225,7 @@ def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang):
226225

227226
config = speech.types.RecognitionConfig(
228227
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
228+
sample_rate_hertz=44100,
229229
audio_channel_count=2,
230230
language_code=first_lang,
231231
alternative_language_codes=[second_lang])
@@ -241,15 +241,14 @@ def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang):
241241
# [END speech_transcribe_multilanguage]
242242

243243

244-
def transcribe_file_with_word_level_confidence(speech_file):
244+
def transcribe_file_with_word_level_confidence():
245245
"""Transcribe the given audio file synchronously with
246246
word level confidence."""
247247
# [START speech_transcribe_word_level_confidence]
248248
from google.cloud import speech_v1p1beta1 as speech
249249
client = speech.SpeechClient()
250250

251-
# TODO(developer): Uncomment and set to a path to your audio file.
252-
# speech_file = 'path/to/file.wav'
251+
speech_file = 'resources/Google_Gnome.wav'
253252

254253
with open(speech_file, 'rb') as audio_file:
255254
content = audio_file.read()
@@ -279,28 +278,20 @@ def transcribe_file_with_word_level_confidence(speech_file):
279278
description=__doc__,
280279
formatter_class=argparse.RawDescriptionHelpFormatter)
281280
parser.add_argument('command')
282-
parser.add_argument(
283-
'path', help='File for audio file to be recognized')
284-
parser.add_argument(
285-
'first', help='First language in audio file to be recognized',
286-
nargs='?')
287-
parser.add_argument(
288-
'second', help='Second language in audio file to be recognized',
289-
nargs='?')
290281

291282
args = parser.parse_args()
292283

293284
if args.command == 'enhanced-model':
294-
transcribe_file_with_enhanced_model(args.path)
285+
transcribe_file_with_enhanced_model()
295286
elif args.command == 'metadata':
296-
transcribe_file_with_metadata(args.path)
287+
transcribe_file_with_metadata()
297288
elif args.command == 'punctuation':
298-
transcribe_file_with_auto_punctuation(args.path)
289+
transcribe_file_with_auto_punctuation()
299290
elif args.command == 'diarization':
300-
transcribe_file_with_diarization(args.path)
291+
transcribe_file_with_diarization()
301292
elif args.command == 'multi-channel':
302-
transcribe_file_with_multichannel(args.path)
293+
transcribe_file_with_multichannel()
303294
elif args.command == 'multi-language':
304-
transcribe_file_with_multilanguage(args.path, args.first, args.second)
295+
transcribe_file_with_multilanguage()
305296
elif args.command == 'word-level-conf':
306-
transcribe_file_with_word_level_confidence(args.path)
297+
transcribe_file_with_word_level_confidence()

‎speech/cloud-client/beta_snippets_test.py

Copy file name to clipboardExpand all lines: speech/cloud-client/beta_snippets_test.py
+8-15Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -26,56 +26,49 @@
2626

2727

2828
def test_transcribe_file_with_enhanced_model(capsys):
29-
transcribe_file_with_enhanced_model(
30-
os.path.join(RESOURCES, 'commercial_mono.wav'))
29+
transcribe_file_with_enhanced_model()
3130
out, _ = capsys.readouterr()
3231

3332
assert 'Chrome' in out
3433

3534

3635
def test_transcribe_file_with_metadata(capsys):
37-
transcribe_file_with_metadata(
38-
os.path.join(RESOURCES, 'commercial_mono.wav'))
36+
transcribe_file_with_metadata()
3937
out, _ = capsys.readouterr()
4038

4139
assert 'Chrome' in out
4240

4341

4442
def test_transcribe_file_with_auto_punctuation(capsys):
45-
transcribe_file_with_auto_punctuation(
46-
os.path.join(RESOURCES, 'commercial_mono.wav'))
43+
transcribe_file_with_auto_punctuation()
4744
out, _ = capsys.readouterr()
4845

4946
assert 'Okay. Sure.' in out
5047

5148

5249
def test_transcribe_diarization(capsys):
53-
transcribe_file_with_diarization(
54-
os.path.join(RESOURCES, 'Google_Gnome.wav'))
50+
transcribe_file_with_diarization()
5551
out, err = capsys.readouterr()
5652

57-
assert 'OK Google stream stranger things from Netflix to my TV' in out
53+
assert "word: 'here', speaker_tag: 1" in out
5854

5955

6056
def test_transcribe_multichannel_file(capsys):
61-
transcribe_file_with_multichannel(
62-
os.path.join(RESOURCES, 'Google_Gnome.wav'))
57+
transcribe_file_with_multichannel()
6358
out, err = capsys.readouterr()
6459

6560
assert 'OK Google stream stranger things from Netflix to my TV' in out
6661

6762

6863
def test_transcribe_multilanguage_file(capsys):
69-
transcribe_file_with_multilanguage(
70-
os.path.join(RESOURCES, 'multi.wav'), 'en-US', 'es')
64+
transcribe_file_with_multilanguage()
7165
out, err = capsys.readouterr()
7266

7367
assert 'how are you doing estoy bien e tu' in out
7468

7569

7670
def test_transcribe_word_level_confidence(capsys):
77-
transcribe_file_with_word_level_confidence(
78-
os.path.join(RESOURCES, 'Google_Gnome.wav'))
71+
transcribe_file_with_word_level_confidence()
7972
out, err = capsys.readouterr()
8073

8174
assert 'OK Google stream stranger things from Netflix to my TV' in out

‎texttospeech/cloud-client/audio_profile.py

Copy file name to clipboardExpand all lines: texttospeech/cloud-client/audio_profile.py
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
1919
Example usage:
2020
python audio_profile.py --text "hello" --effects_profile_id
21-
"telephony-class-application"
21+
"telephony-class-application" --output "output.mp3"
2222
"""
2323

2424
import argparse

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.