Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 1f1dee7

Browse filesBrowse files
authored
Video GA text detection and object tracking (GoogleCloudPlatform#2024)
* add video text and object samples * update sample data location * add tests of the gcs samples * update client library version in requirements.txt * fix * remove location_id
1 parent 6ea559f commit 1f1dee7
Copy full SHA for 1f1dee7

File tree

Expand file treeCollapse file tree

3 files changed

+243
-9
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+243
-9
lines changed

‎video/cloud-client/analyze/analyze.py

Copy file name to clipboardExpand all lines: video/cloud-client/analyze/analyze.py
+222-4Lines changed: 222 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,16 @@
1919
2020
Usage Examples:
2121
22-
python analyze.py labels gs://cloud-ml-sandbox/video/chicago.mp4
22+
python analyze.py labels gs://cloud-samples-data/video/chicago.mp4
2323
python analyze.py labels_file resources/cat.mp4
24-
python analyze.py shots gs://demomaker/gbikes_dinosaur.mp4
25-
python analyze.py explicit_content gs://demomaker/gbikes_dinosaur.mp4
26-
24+
python analyze.py shots gs://cloud-samples-data/video/gbikes_dinosaur.mp4
25+
python analyze.py explicit_content \
26+
gs://cloud-samples-data/video/gbikes_dinosaur.mp4
27+
python analyze.py text_gcs \
28+
gs://cloud-samples-data/video/googlework_short.mp4
29+
python analyze.py text_file resources/googlework_short.mp4
30+
python analyze.py objects_gcs gs://cloud-samples-data/video/cat.mp4
31+
python analyze.py objects_file resources/cat.mp4
2732
"""
2833

2934
import argparse
@@ -278,27 +283,232 @@ def speech_transcription(path):
278283
# [END video_speech_transcription_gcs]
279284

280285

286+
def video_detect_text_gcs(input_uri):
287+
# [START video_detect_text_gcs]
288+
"""Detect text in a video stored on GCS."""
289+
from google.cloud import videointelligence
290+
291+
video_client = videointelligence.VideoIntelligenceServiceClient()
292+
features = [videointelligence.enums.Feature.TEXT_DETECTION]
293+
294+
operation = video_client.annotate_video(
295+
input_uri=input_uri,
296+
features=features)
297+
298+
print('\nProcessing video for text detection.')
299+
result = operation.result(timeout=300)
300+
301+
# The first result is retrieved because a single video was processed.
302+
annotation_result = result.annotation_results[0]
303+
304+
for text_annotation in annotation_result.text_annotations:
305+
print('\nText: {}'.format(text_annotation.text))
306+
307+
# Get the first text segment
308+
text_segment = text_annotation.segments[0]
309+
start_time = text_segment.segment.start_time_offset
310+
end_time = text_segment.segment.end_time_offset
311+
print('start_time: {}, end_time: {}'.format(
312+
start_time.seconds + start_time.nanos * 1e-9,
313+
end_time.seconds + end_time.nanos * 1e-9))
314+
315+
print('Confidence: {}'.format(text_segment.confidence))
316+
317+
# Show the result for the first frame in this segment.
318+
frame = text_segment.frames[0]
319+
time_offset = frame.time_offset
320+
print('Time offset for the first frame: {}'.format(
321+
time_offset.seconds + time_offset.nanos * 1e-9))
322+
print('Rotated Bounding Box Vertices:')
323+
for vertex in frame.rotated_bounding_box.vertices:
324+
print('\tVertex.x: {}, Vertex.y: {}'.format(vertex.x, vertex.y))
325+
# [END video_detect_text_gcs]
326+
327+
328+
def video_detect_text(path):
329+
# [START video_detect_text]
330+
"""Detect text in a local video."""
331+
from google.cloud import videointelligence
332+
333+
video_client = videointelligence.VideoIntelligenceServiceClient()
334+
features = [videointelligence.enums.Feature.TEXT_DETECTION]
335+
video_context = videointelligence.types.VideoContext()
336+
337+
with io.open(path, 'rb') as file:
338+
input_content = file.read()
339+
340+
operation = video_client.annotate_video(
341+
input_content=input_content, # the bytes of the video file
342+
features=features,
343+
video_context=video_context)
344+
345+
print('\nProcessing video for text detection.')
346+
result = operation.result(timeout=300)
347+
348+
# The first result is retrieved because a single video was processed.
349+
annotation_result = result.annotation_results[0]
350+
351+
for text_annotation in annotation_result.text_annotations:
352+
print('\nText: {}'.format(text_annotation.text))
353+
354+
# Get the first text segment
355+
text_segment = text_annotation.segments[0]
356+
start_time = text_segment.segment.start_time_offset
357+
end_time = text_segment.segment.end_time_offset
358+
print('start_time: {}, end_time: {}'.format(
359+
start_time.seconds + start_time.nanos * 1e-9,
360+
end_time.seconds + end_time.nanos * 1e-9))
361+
362+
print('Confidence: {}'.format(text_segment.confidence))
363+
364+
# Show the result for the first frame in this segment.
365+
frame = text_segment.frames[0]
366+
time_offset = frame.time_offset
367+
print('Time offset for the first frame: {}'.format(
368+
time_offset.seconds + time_offset.nanos * 1e-9))
369+
print('Rotated Bounding Box Vertices:')
370+
for vertex in frame.rotated_bounding_box.vertices:
371+
print('\tVertex.x: {}, Vertex.y: {}'.format(vertex.x, vertex.y))
372+
# [END video_detect_text]
373+
374+
375+
def track_objects_gcs(gcs_uri):
376+
# [START video_object_tracking_gcs]
377+
"""Object tracking in a video stored on GCS."""
378+
from google.cloud import videointelligence
379+
380+
video_client = videointelligence.VideoIntelligenceServiceClient()
381+
features = [videointelligence.enums.Feature.OBJECT_TRACKING]
382+
operation = video_client.annotate_video(
383+
input_uri=gcs_uri, features=features)
384+
print('\nProcessing video for object annotations.')
385+
386+
result = operation.result(timeout=300)
387+
print('\nFinished processing.\n')
388+
389+
# The first result is retrieved because a single video was processed.
390+
object_annotations = result.annotation_results[0].object_annotations
391+
392+
for object_annotation in object_annotations:
393+
print('Entity description: {}'.format(
394+
object_annotation.entity.description))
395+
if object_annotation.entity.entity_id:
396+
print('Entity id: {}'.format(object_annotation.entity.entity_id))
397+
398+
print('Segment: {}s to {}s'.format(
399+
object_annotation.segment.start_time_offset.seconds +
400+
object_annotation.segment.start_time_offset.nanos / 1e9,
401+
object_annotation.segment.end_time_offset.seconds +
402+
object_annotation.segment.end_time_offset.nanos / 1e9))
403+
404+
print('Confidence: {}'.format(object_annotation.confidence))
405+
406+
# Here we print only the bounding box of the first frame in the segment
407+
frame = object_annotation.frames[0]
408+
box = frame.normalized_bounding_box
409+
print('Time offset of the first frame: {}s'.format(
410+
frame.time_offset.seconds + frame.time_offset.nanos / 1e9))
411+
print('Bounding box position:')
412+
print('\tleft : {}'.format(box.left))
413+
print('\ttop : {}'.format(box.top))
414+
print('\tright : {}'.format(box.right))
415+
print('\tbottom: {}'.format(box.bottom))
416+
print('\n')
417+
# [END video_object_tracking_gcs]
418+
419+
420+
def track_objects(path):
421+
# [START video_object_tracking]
422+
"""Object tracking in a local video."""
423+
from google.cloud import videointelligence
424+
425+
video_client = videointelligence.VideoIntelligenceServiceClient()
426+
features = [videointelligence.enums.Feature.OBJECT_TRACKING]
427+
428+
with io.open(path, 'rb') as file:
429+
input_content = file.read()
430+
431+
operation = video_client.annotate_video(
432+
input_content=input_content, features=features)
433+
print('\nProcessing video for object annotations.')
434+
435+
result = operation.result(timeout=300)
436+
print('\nFinished processing.\n')
437+
438+
# The first result is retrieved because a single video was processed.
439+
object_annotations = result.annotation_results[0].object_annotations
440+
441+
# Get only the first annotation for demo purposes.
442+
object_annotation = object_annotations[0]
443+
print('Entity description: {}'.format(
444+
object_annotation.entity.description))
445+
if object_annotation.entity.entity_id:
446+
print('Entity id: {}'.format(object_annotation.entity.entity_id))
447+
448+
print('Segment: {}s to {}s'.format(
449+
object_annotation.segment.start_time_offset.seconds +
450+
object_annotation.segment.start_time_offset.nanos / 1e9,
451+
object_annotation.segment.end_time_offset.seconds +
452+
object_annotation.segment.end_time_offset.nanos / 1e9))
453+
454+
print('Confidence: {}'.format(object_annotation.confidence))
455+
456+
# Here we print only the bounding box of the first frame in this segment
457+
frame = object_annotation.frames[0]
458+
box = frame.normalized_bounding_box
459+
print('Time offset of the first frame: {}s'.format(
460+
frame.time_offset.seconds + frame.time_offset.nanos / 1e9))
461+
print('Bounding box position:')
462+
print('\tleft : {}'.format(box.left))
463+
print('\ttop : {}'.format(box.top))
464+
print('\tright : {}'.format(box.right))
465+
print('\tbottom: {}'.format(box.bottom))
466+
print('\n')
467+
# [END video_object_tracking]
468+
469+
281470
if __name__ == '__main__':
282471
parser = argparse.ArgumentParser(
283472
description=__doc__,
284473
formatter_class=argparse.RawDescriptionHelpFormatter)
285474
subparsers = parser.add_subparsers(dest='command')
475+
286476
analyze_labels_parser = subparsers.add_parser(
287477
'labels', help=analyze_labels.__doc__)
288478
analyze_labels_parser.add_argument('path')
479+
289480
analyze_labels_file_parser = subparsers.add_parser(
290481
'labels_file', help=analyze_labels_file.__doc__)
291482
analyze_labels_file_parser.add_argument('path')
483+
292484
analyze_explicit_content_parser = subparsers.add_parser(
293485
'explicit_content', help=analyze_explicit_content.__doc__)
294486
analyze_explicit_content_parser.add_argument('path')
487+
295488
analyze_shots_parser = subparsers.add_parser(
296489
'shots', help=analyze_shots.__doc__)
297490
analyze_shots_parser.add_argument('path')
491+
298492
transcribe_speech_parser = subparsers.add_parser(
299493
'transcribe', help=speech_transcription.__doc__)
300494
transcribe_speech_parser.add_argument('path')
301495

496+
detect_text_parser = subparsers.add_parser(
497+
'text_gcs', help=video_detect_text_gcs.__doc__)
498+
detect_text_parser.add_argument('path')
499+
500+
detect_text_file_parser = subparsers.add_parser(
501+
'text_file', help=video_detect_text.__doc__)
502+
detect_text_file_parser.add_argument('path')
503+
504+
tack_objects_parser = subparsers.add_parser(
505+
'objects_gcs', help=track_objects_gcs.__doc__)
506+
tack_objects_parser.add_argument('path')
507+
508+
tack_objects_file_parser = subparsers.add_parser(
509+
'objects_file', help=track_objects.__doc__)
510+
tack_objects_file_parser.add_argument('path')
511+
302512
args = parser.parse_args()
303513

304514
if args.command == 'labels':
@@ -311,3 +521,11 @@ def speech_transcription(path):
311521
analyze_explicit_content(args.path)
312522
if args.command == 'transcribe':
313523
speech_transcription(args.path)
524+
if args.command == 'text_gcs':
525+
video_detect_text_gcs(args.path)
526+
if args.command == 'text_file':
527+
video_detect_text(args.path)
528+
if args.command == 'objects_gcs':
529+
track_objects_gcs(args.path)
530+
if args.command == 'objects_file':
531+
track_objects(args.path)

‎video/cloud-client/analyze/analyze_test.py

Copy file name to clipboardExpand all lines: video/cloud-client/analyze/analyze_test.py
+20-4Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,28 +21,44 @@
2121

2222
@pytest.mark.slow
2323
def test_analyze_shots(capsys):
24-
analyze.analyze_shots('gs://demomaker/gbikes_dinosaur.mp4')
24+
analyze.analyze_shots('gs://cloud-samples-data/video/gbikes_dinosaur.mp4')
2525
out, _ = capsys.readouterr()
2626
assert 'Shot 1:' in out
2727

2828

2929
@pytest.mark.slow
3030
def test_analyze_labels(capsys):
31-
analyze.analyze_labels('gs://demomaker/cat.mp4')
31+
analyze.analyze_labels('gs://cloud-samples-data/video/cat.mp4')
3232
out, _ = capsys.readouterr()
3333
assert 'label description: cat' in out
3434

3535

3636
@pytest.mark.slow
3737
def test_analyze_explicit_content(capsys):
38-
analyze.analyze_explicit_content('gs://demomaker/cat.mp4')
38+
analyze.analyze_explicit_content('gs://cloud-samples-data/video/cat.mp4')
3939
out, _ = capsys.readouterr()
4040
assert 'pornography' in out
4141

4242

4343
@pytest.mark.slow
4444
def test_speech_transcription(capsys):
4545
analyze.speech_transcription(
46-
'gs://python-docs-samples-tests/video/googlework_short.mp4')
46+
'gs://cloud-samples-data/video/googlework_short.mp4')
4747
out, _ = capsys.readouterr()
4848
assert 'cultural' in out
49+
50+
51+
@pytest.mark.slow
52+
def test_detect_text_gcs(capsys):
53+
analyze.video_detect_text_gcs(
54+
'gs://cloud-samples-data/video/googlework_short.mp4')
55+
out, _ = capsys.readouterr()
56+
assert 'GOOGLE' in out
57+
58+
59+
@pytest.mark.slow
60+
def test_track_objects_gcs(capsys):
61+
analyze.track_objects_gcs(
62+
'gs://cloud-samples-data/video/cat.mp4')
63+
out, _ = capsys.readouterr()
64+
assert 'cat' in out
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
google-cloud-videointelligence==1.6.1
1+
google-cloud-videointelligence==1.7.0

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.