Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit f6d7fe4

Browse filesBrowse files
dizcologyJon Wayne Parrott
authored andcommitted
Speech model selection (GoogleCloudPlatform#1361)
* add transcribe_model_selection * add transcribe_model_selection_test * flake
1 parent 7370677 commit f6d7fe4
Copy full SHA for f6d7fe4

File tree

Expand file treeCollapse file tree

3 files changed

+137
-0
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+137
-0
lines changed
1.7 MB
Binary file not shown.
+102Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2017 Google Inc. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""Google Cloud Speech API sample that demonstrates how to select the model
18+
used for speech recognition.
19+
20+
Example usage:
21+
python transcribe_model_selection.py \
22+
resources/Google_Gnome.wav --model video
23+
python transcribe_model_selection.py \
24+
gs://cloud-samples-tests/speech/Google_Gnome.wav --model video
25+
"""
26+
27+
import argparse
28+
29+
30+
# [START speech_transcribe_model_selection]
31+
def transcribe_model_selection(speech_file, model):
32+
"""Transcribe the given audio file synchronously with
33+
the selected model."""
34+
from google.cloud import speech_v1p1beta1 as speech
35+
client = speech.SpeechClient()
36+
37+
with open(speech_file, 'rb') as audio_file:
38+
content = audio_file.read()
39+
40+
audio = speech.types.RecognitionAudio(content=content)
41+
42+
config = speech.types.RecognitionConfig(
43+
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
44+
sample_rate_hertz=16000,
45+
language_code='en-US',
46+
model=model)
47+
48+
response = client.recognize(config, audio)
49+
50+
for i, result in enumerate(response.results):
51+
alternative = result.alternatives[0]
52+
print('-' * 20)
53+
print('First alternative of result {}'.format(i))
54+
print('Transcript: {}'.format(alternative.transcript))
55+
# [END speech_transcribe_model_selection]
56+
57+
58+
# [START speech_transcribe_model_selection_gcs]
59+
def transcribe_model_selection_gcs(gcs_uri, model):
60+
"""Transcribe the given audio file asynchronously with
61+
the selected model."""
62+
from google.cloud import speech_v1p1beta1 as speech
63+
client = speech.SpeechClient()
64+
65+
audio = speech.types.RecognitionAudio(uri=gcs_uri)
66+
67+
config = speech.types.RecognitionConfig(
68+
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
69+
sample_rate_hertz=16000,
70+
language_code='en-US',
71+
model=model)
72+
73+
operation = client.long_running_recognize(config, audio)
74+
75+
print('Waiting for operation to complete...')
76+
response = operation.result(timeout=90)
77+
78+
for i, result in enumerate(response.results):
79+
alternative = result.alternatives[0]
80+
print('-' * 20)
81+
print('First alternative of result {}'.format(i))
82+
print('Transcript: {}'.format(alternative.transcript))
83+
# [END speech_transcribe_model_selection_gcs]
84+
85+
86+
if __name__ == '__main__':
87+
parser = argparse.ArgumentParser(
88+
description=__doc__,
89+
formatter_class=argparse.RawDescriptionHelpFormatter)
90+
parser.add_argument(
91+
'path', help='File or GCS path for audio file to be recognized')
92+
parser.add_argument(
93+
'--model', help='The speech recognition model to use',
94+
choices=['command_and_search', 'phone_call', 'video', 'default'],
95+
default='default')
96+
97+
args = parser.parse_args()
98+
99+
if args.path.startswith('gs://'):
100+
transcribe_model_selection_gcs(args.path, args.model)
101+
else:
102+
transcribe_model_selection(args.path, args.model)
+35Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Copyright 2016, Google, Inc.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
import os
15+
import re
16+
17+
import transcribe_model_selection
18+
19+
RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
20+
21+
22+
def test_transcribe_model_selection_file(capsys):
23+
transcribe_model_selection.transcribe_model_selection(
24+
os.path.join(RESOURCES, 'Google_Gnome.wav'), 'video')
25+
out, err = capsys.readouterr()
26+
27+
assert re.search(r'the weather outside is sunny', out, re.DOTALL | re.I)
28+
29+
30+
def test_transcribe_model_selection_gcs(capsys):
31+
transcribe_model_selection.transcribe_model_selection_gcs(
32+
'gs://cloud-samples-tests/speech/Google_Gnome.wav', 'video')
33+
out, err = capsys.readouterr()
34+
35+
assert re.search(r'the weather outside is sunny', out, re.DOTALL | re.I)

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.