Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 5272268

Browse filesBrowse files
feat: video speech transcription (GoogleCloudPlatform#1849)
1 parent 5f1a321 commit 5272268
Copy full SHA for 5272268

File tree

Expand file treeCollapse file tree

2 files changed

+62
-0
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+62
-0
lines changed

‎video/cloud-client/analyze/analyze.py

Copy file name to clipboardExpand all lines: video/cloud-client/analyze/analyze.py
+54Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,55 @@ def analyze_shots(path):
229229
# [END video_analyze_shots]
230230

231231

232+
def speech_transcription(path):
233+
# [START video_speech_transcription]
234+
"""Transcribe speech from a video stored on GCS."""
235+
from google.cloud import videointelligence
236+
237+
video_client = videointelligence.VideoIntelligenceServiceClient()
238+
features = [videointelligence.enums.Feature.SPEECH_TRANSCRIPTION]
239+
240+
config = videointelligence.types.SpeechTranscriptionConfig(
241+
language_code='en-US',
242+
enable_automatic_punctuation=True)
243+
video_context = videointelligence.types.VideoContext(
244+
speech_transcription_config=config)
245+
246+
operation = video_client.annotate_video(
247+
path, features=features,
248+
video_context=video_context)
249+
250+
print('\nProcessing video for speech transcription.')
251+
252+
result = operation.result(timeout=600)
253+
254+
# There is only one annotation_result since only
255+
# one video is processed.
256+
annotation_results = result.annotation_results[0]
257+
for speech_transcription in annotation_results.speech_transcriptions:
258+
259+
# The number of alternatives for each transcription is limited by
260+
# SpeechTranscriptionConfig.max_alternatives.
261+
# Each alternative is a different possible transcription
262+
# and has its own confidence score.
263+
for alternative in speech_transcription.alternatives:
264+
print('Alternative level information:')
265+
266+
print('Transcript: {}'.format(alternative.transcript))
267+
print('Confidence: {}\n'.format(alternative.confidence))
268+
269+
print('Word level information:')
270+
for word_info in alternative.words:
271+
word = word_info.word
272+
start_time = word_info.start_time
273+
end_time = word_info.end_time
274+
print('\t{}s - {}s: {}'.format(
275+
start_time.seconds + start_time.nanos * 1e-9,
276+
end_time.seconds + end_time.nanos * 1e-9,
277+
word))
278+
# [END video_speech_transcription]
279+
280+
232281
if __name__ == '__main__':
233282
parser = argparse.ArgumentParser(
234283
description=__doc__,
@@ -246,6 +295,9 @@ def analyze_shots(path):
246295
analyze_shots_parser = subparsers.add_parser(
247296
'shots', help=analyze_shots.__doc__)
248297
analyze_shots_parser.add_argument('path')
298+
transcribe_speech_parser = subparsers.add_parser(
299+
'transcribe', help=speech_transcription.__doc__)
300+
transcribe_speech_parser.add_argument('path')
249301

250302
args = parser.parse_args()
251303

@@ -257,3 +309,5 @@ def analyze_shots(path):
257309
analyze_shots(args.path)
258310
if args.command == 'explicit_content':
259311
analyze_explicit_content(args.path)
312+
if args.command == 'transcribe':
313+
speech_transcription(args.path)

‎video/cloud-client/analyze/analyze_test.py

Copy file name to clipboardExpand all lines: video/cloud-client/analyze/analyze_test.py
+8Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,11 @@ def test_analyze_explicit_content(capsys):
3838
analyze.analyze_explicit_content('gs://demomaker/cat.mp4')
3939
out, _ = capsys.readouterr()
4040
assert 'pornography' in out
41+
42+
43+
@pytest.mark.slow
44+
def test_speech_transcription(capsys):
45+
analyze.speech_transcription(
46+
'gs://python-docs-samples-tests/video/googlework_short.mp4')
47+
out, _ = capsys.readouterr()
48+
assert 'cultural' in out

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.