Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit b92890a

Browse filesBrowse files
author
Jon Wayne Parrott
authored
Add word time offset samples (GoogleCloudPlatform#1050)
1 parent a5b0301 commit b92890a
Copy full SHA for b92890a

File tree

Expand file treeCollapse file tree

6 files changed

+189
-40
lines changed
Filter options
Expand file treeCollapse file tree

6 files changed

+189
-40
lines changed

‎speech/cloud-client/README.rst

Copy file name to clipboardExpand all lines: speech/cloud-client/README.rst
+26Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,32 @@ To run this sample:
140140
-h, --help show this help message and exit
141141
142142
143+
Transcribe with word time offsets
144+
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
145+
146+
147+
148+
To run this sample:
149+
150+
.. code-block:: bash
151+
152+
$ python transcribe_word_time_offsets.py
153+
154+
usage: transcribe_word_time_offsets.py [-h] path
155+
156+
Google Cloud Speech API sample that demonstrates word time offsets.
157+
158+
Example usage:
159+
python transcribe_word_time_offsets.py resources/audio.raw
160+
python transcribe_word_time_offsets.py gs://cloud-samples-tests/speech/vr.flac
161+
162+
positional arguments:
163+
path File or GCS path for audio file to be recognized
164+
165+
optional arguments:
166+
-h, --help show this help message and exit
167+
168+
143169
Transcribe Streaming
144170
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
145171

‎speech/cloud-client/README.rst.in

Copy file name to clipboardExpand all lines: speech/cloud-client/README.rst.in
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ samples:
2828
- name: Transcribe async
2929
file: transcribe_async.py
3030
show_help: true
31+
- name: Transcribe with word time offsets
32+
file: transcribe_word_time_offsets.py
33+
show_help: true
3134
- name: Transcribe Streaming
3235
file: transcribe_streaming.py
3336
show_help: true

‎speech/cloud-client/transcribe_async.py

Copy file name to clipboardExpand all lines: speech/cloud-client/transcribe_async.py
+6-29Lines changed: 6 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424

2525
import argparse
2626
import io
27-
import time
2827

2928

3029
# [START def_transcribe_file]
@@ -49,17 +48,10 @@ def transcribe_file(speech_file):
4948
operation = client.long_running_recognize(config, audio)
5049
# [END migration_async_request]
5150

52-
# Sleep and poll operation.done()
53-
retry_count = 100
54-
while retry_count > 0 and not operation.done():
55-
retry_count -= 1
56-
time.sleep(2)
51+
print('Waiting for operation to complete...')
52+
result = operation.result(timeout=90)
5753

58-
if not operation.done():
59-
print('Operation not complete and retry limit reached.')
60-
return
61-
62-
alternatives = operation.result().results[0].alternatives
54+
alternatives = result.results[0].alternatives
6355
for alternative in alternatives:
6456
print('Transcript: {}'.format(alternative.transcript))
6557
print('Confidence: {}'.format(alternative.confidence))
@@ -84,28 +76,13 @@ def transcribe_gcs(gcs_uri):
8476

8577
operation = client.long_running_recognize(config, audio)
8678

87-
retry_count = 100
88-
while retry_count > 0 and not operation.done():
89-
retry_count -= 1
90-
time.sleep(2)
91-
92-
if not operation.done():
93-
print('Operation not complete and retry limit reached.')
94-
return
79+
print('Waiting for operation to complete...')
80+
result = operation.result(timeout=90)
9581

96-
alternatives = operation.result().results[0].alternatives
82+
alternatives = result.results[0].alternatives
9783
for alternative in alternatives:
9884
print('Transcript: {}'.format(alternative.transcript))
9985
print('Confidence: {}'.format(alternative.confidence))
100-
101-
for word_info in alternative.words:
102-
word = word_info.word
103-
start_time = word_info.start_time
104-
end_time = word_info.end_time
105-
print('Word: {}, start_time: {}, end_time: {}'.format(
106-
word,
107-
start_time.seconds + start_time.nanos * 1e-9,
108-
end_time.seconds + end_time.nanos * 1e-9))
10986
# [END def_transcribe_gcs]
11087

11188

‎speech/cloud-client/transcribe_async_test.py

Copy file name to clipboardExpand all lines: speech/cloud-client/transcribe_async_test.py
-11Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,3 @@ def test_transcribe_gcs(capsys):
3333
out, err = capsys.readouterr()
3434

3535
assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)
36-
37-
38-
def test_transcribe_gcs_word_time_offsets(capsys):
39-
transcribe_async.transcribe_gcs(
40-
'gs://python-docs-samples-tests/speech/audio.flac')
41-
out, err = capsys.readouterr()
42-
43-
match = re.search(r'Bridge, start_time: ([0-9.]+)', out, re.DOTALL | re.I)
44-
time = float(match.group(1))
45-
46-
assert time > 0
+111Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2017 Google Inc. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""Google Cloud Speech API sample that demonstrates word time offsets.
18+
19+
Example usage:
20+
python transcribe_word_time_offsets.py resources/audio.raw
21+
python transcribe_word_time_offsets.py \
22+
gs://cloud-samples-tests/speech/vr.flac
23+
"""
24+
25+
import argparse
26+
import io
27+
28+
29+
def transcribe_file_with_word_time_offsets(speech_file):
30+
"""Transcribe the given audio file synchronously and output the word time
31+
offsets."""
32+
from google.cloud import speech
33+
from google.cloud.speech import enums
34+
from google.cloud.speech import types
35+
client = speech.SpeechClient()
36+
37+
with io.open(speech_file, 'rb') as audio_file:
38+
content = audio_file.read()
39+
40+
audio = types.RecognitionAudio(content=content)
41+
config = types.RecognitionConfig(
42+
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
43+
sample_rate_hertz=16000,
44+
language_code='en-US',
45+
enable_word_time_offsets=True)
46+
47+
response = client.recognize(config, audio)
48+
49+
alternatives = response.results[0].alternatives
50+
51+
for alternative in alternatives:
52+
print('Transcript: {}'.format(alternative.transcript))
53+
54+
for word_info in alternative.words:
55+
word = word_info.word
56+
start_time = word_info.start_time
57+
end_time = word_info.end_time
58+
print('Word: {}, start_time: {}, end_time: {}'.format(
59+
word,
60+
start_time.seconds + start_time.nanos * 1e-9,
61+
end_time.seconds + end_time.nanos * 1e-9))
62+
63+
64+
# [START def_transcribe_gcs]
65+
def transcribe_gcs_with_word_time_offsets(gcs_uri):
66+
"""Transcribe the given audio file asynchronously and output the word time
67+
offsets."""
68+
from google.cloud import speech
69+
from google.cloud.speech import enums
70+
from google.cloud.speech import types
71+
client = speech.SpeechClient()
72+
73+
audio = types.RecognitionAudio(uri=gcs_uri)
74+
config = types.RecognitionConfig(
75+
encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
76+
sample_rate_hertz=16000,
77+
language_code='en-US',
78+
enable_word_time_offsets=True)
79+
80+
operation = client.long_running_recognize(config, audio)
81+
82+
print('Waiting for operation to complete...')
83+
result = operation.result(timeout=90)
84+
85+
alternatives = result.results[0].alternatives
86+
for alternative in alternatives:
87+
print('Transcript: {}'.format(alternative.transcript))
88+
print('Confidence: {}'.format(alternative.confidence))
89+
90+
for word_info in alternative.words:
91+
word = word_info.word
92+
start_time = word_info.start_time
93+
end_time = word_info.end_time
94+
print('Word: {}, start_time: {}, end_time: {}'.format(
95+
word,
96+
start_time.seconds + start_time.nanos * 1e-9,
97+
end_time.seconds + end_time.nanos * 1e-9))
98+
# [END def_transcribe_gcs]
99+
100+
101+
if __name__ == '__main__':
102+
parser = argparse.ArgumentParser(
103+
description=__doc__,
104+
formatter_class=argparse.RawDescriptionHelpFormatter)
105+
parser.add_argument(
106+
'path', help='File or GCS path for audio file to be recognized')
107+
args = parser.parse_args()
108+
if args.path.startswith('gs://'):
109+
transcribe_gcs_with_word_time_offsets(args.path)
110+
else:
111+
transcribe_file_with_word_time_offsets(args.path)
+43Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Copyright 2016, Google, Inc.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
import os
15+
import re
16+
17+
import transcribe_word_time_offsets
18+
19+
RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
20+
21+
22+
def test_transcribe_file_with_word_time_offsets(capsys):
23+
transcribe_word_time_offsets.transcribe_file_with_word_time_offsets(
24+
os.path.join(RESOURCES, 'audio.raw'))
25+
out, _ = capsys.readouterr()
26+
27+
print(out)
28+
match = re.search(r'Bridge, start_time: ([0-9.]+)', out, re.DOTALL | re.I)
29+
time = float(match.group(1))
30+
31+
assert time > 0
32+
33+
34+
def test_transcribe_gcs_with_word_time_offsets(capsys):
35+
transcribe_word_time_offsets.transcribe_gcs_with_word_time_offsets(
36+
'gs://python-docs-samples-tests/speech/audio.flac')
37+
out, _ = capsys.readouterr()
38+
39+
print(out)
40+
match = re.search(r'Bridge, start_time: ([0-9.]+)', out, re.DOTALL | re.I)
41+
time = float(match.group(1))
42+
43+
assert time > 0

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.