Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Add beta snippets for object tracking / text detection #1773

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 24, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions 40 video/cloud-client/analyze/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,46 @@ To run this sample:



beta samples
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

.. image:: https://gstatic.com/cloudssh/images/open-btn.png
:target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=video/cloud-client/analyze/beta_snippets.py,video/cloud-client/analyze/README.rst




To run this sample:

.. code-block:: bash

$ python beta_snippets.py

usage: beta_snippets.py [-h]
{transcription,video-text-gcs,video-text,track-objects-gcs,track-objects}
...

This application demonstrates speech transcription using the
Google Cloud API.

Usage Examples:
python beta_snippets.py transcription gs://python-docs-samples-tests/video/googlework_short.mp4
python beta_snippets.py video-text-gcs gs://python-docs-samples-tests/video/googlework_short.mp4
python beta_snippets.py track-objects /resources/cat.mp4

positional arguments:
{transcription,video-text-gcs,video-text,track-objects-gcs,track-objects}
transcription Transcribe speech from a video stored on GCS.
video-text-gcs Detect text in a video stored on GCS.
video-text Detect text in a local video.
track-objects-gcs Object Tracking.
track-objects Object Tracking.

optional arguments:
-h, --help show this help message and exit





The client library
Expand Down
3 changes: 3 additions & 0 deletions 3 video/cloud-client/analyze/README.rst.in
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ samples:
- name: analyze
file: analyze.py
show_help: True
- name: beta samples
file: beta_snippets.py
show_help: True

cloud_client_library: true

Expand Down
235 changes: 229 additions & 6 deletions 235 video/cloud-client/analyze/beta_snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,22 @@
Google Cloud API.

Usage Examples:
python beta_snippets.py \
transcription gs://python-docs-samples-tests/video/googlework_short.mp4
python beta_snippets.py transcription \
gs://python-docs-samples-tests/video/googlework_short.mp4
python beta_snippets.py video-text-gcs \
gs://python-docs-samples-tests/video/googlework_short.mp4
python beta_snippets.py track-objects /resources/cat.mp4
"""

import argparse
import io

from google.cloud import videointelligence_v1p1beta1 as videointelligence


# [START video_speech_transcription_gcs_beta]
def speech_transcription(input_uri):
# [START video_speech_transcription_gcs_beta]
"""Transcribe speech from a video stored on GCS."""
from google.cloud import videointelligence_v1p1beta1 as videointelligence

video_client = videointelligence.VideoIntelligenceServiceClient()

features = [videointelligence.enums.Feature.SPEECH_TRANSCRIPTION]
Expand Down Expand Up @@ -66,7 +70,202 @@ def speech_transcription(input_uri):
start_time.seconds + start_time.nanos * 1e-9,
end_time.seconds + end_time.nanos * 1e-9,
word))
# [END video_speech_transcription_gcs_beta]
# [END video_speech_transcription_gcs_beta]


def video_detect_text_gcs(input_uri):
# [START video_detect_text_gcs_beta]
"""Detect text in a video stored on GCS."""
from google.cloud import videointelligence_v1p2beta1 as videointelligence

video_client = videointelligence.VideoIntelligenceServiceClient()
features = [videointelligence.enums.Feature.TEXT_DETECTION]

operation = video_client.annotate_video(
input_uri=input_uri,
features=features)

print('\nProcessing video for text detection.')
result = operation.result(timeout=300)

# The first result is retrieved because a single video was processed.
annotation_result = result.annotation_results[0]

# Get only the first result
text_annotation = annotation_result.text_annotations[0]
print('\nText: {}'.format(text_annotation.text))

# Get the first text segment
text_segment = text_annotation.segments[0]
start_time = text_segment.segment.start_time_offset
end_time = text_segment.segment.end_time_offset
print('start_time: {}, end_time: {}'.format(
start_time.seconds + start_time.nanos * 1e-9,
end_time.seconds + end_time.nanos * 1e-9))

print('Confidence: {}'.format(text_segment.confidence))

# Show the result for the first frame in this segment.
frame = text_segment.frames[0]
time_offset = frame.time_offset
print('Time offset for the first frame: {}'.format(
time_offset.seconds + time_offset.nanos * 1e-9))
print('Rotated Bounding Box Vertices:')
for vertex in frame.rotated_bounding_box.vertices:
print('\tVertex.x: {}, Vertex.y: {}'.format(vertex.x, vertex.y))
# [END video_detect_text_gcs_beta]
return annotation_result.text_annotations


def video_detect_text(path):
# [START video_detect_text_beta]
"""Detect text in a local video."""
from google.cloud import videointelligence_v1p2beta1 as videointelligence

video_client = videointelligence.VideoIntelligenceServiceClient()
features = [videointelligence.enums.Feature.TEXT_DETECTION]
video_context = videointelligence.types.VideoContext()

with io.open(path, 'rb') as file:
input_content = file.read()

operation = video_client.annotate_video(
input_content=input_content, # the bytes of the video file
features=features,
video_context=video_context)

print('\nProcessing video for text detection.')
result = operation.result(timeout=300)

# The first result is retrieved because a single video was processed.
annotation_result = result.annotation_results[0]

# Get only the first result
text_annotation = annotation_result.text_annotations[0]
print('\nText: {}'.format(text_annotation.text))

# Get the first text segment
text_segment = text_annotation.segments[0]
start_time = text_segment.segment.start_time_offset
end_time = text_segment.segment.end_time_offset
print('start_time: {}, end_time: {}'.format(
start_time.seconds + start_time.nanos * 1e-9,
end_time.seconds + end_time.nanos * 1e-9))

print('Confidence: {}'.format(text_segment.confidence))

# Show the result for the first frame in this segment.
frame = text_segment.frames[0]
time_offset = frame.time_offset
print('Time offset for the first frame: {}'.format(
time_offset.seconds + time_offset.nanos * 1e-9))
print('Rotated Bounding Box Vertices:')
for vertex in frame.rotated_bounding_box.vertices:
print('\tVertex.x: {}, Vertex.y: {}'.format(vertex.x, vertex.y))
# [END video_detect_text_beta]
return annotation_result.text_annotations


def track_objects_gcs(gcs_uri):
# [START video_object_tracking_gcs_beta]
"""Object Tracking."""
from google.cloud import videointelligence_v1p2beta1 as videointelligence

# It is recommended to use location_id as 'us-east1' for the best latency
# due to different types of processors used in this region and others.
video_client = videointelligence.VideoIntelligenceServiceClient()
features = [videointelligence.enums.Feature.OBJECT_TRACKING]
operation = video_client.annotate_video(
input_uri=gcs_uri, features=features, location_id='us-east1')
print('\nProcessing video for object annotations.')

result = operation.result(timeout=300)
print('\nFinished processing.\n')

# The first result is retrieved because a single video was processed.
object_annotations = result.annotation_results[0].object_annotations

# Get only the first annotation for demo purposes.
object_annotation = object_annotations[0]
print('Entity description: {}'.format(
object_annotation.entity.description))
if object_annotation.entity.entity_id:
print('Entity id: {}'.format(object_annotation.entity.entity_id))

print('Segment: {}s to {}s'.format(
object_annotation.segment.start_time_offset.seconds +
object_annotation.segment.start_time_offset.nanos / 1e9,
object_annotation.segment.end_time_offset.seconds +
object_annotation.segment.end_time_offset.nanos / 1e9))

print('Confidence: {}'.format(object_annotation.confidence))

# Here we print only the bounding box of the first frame in this segment
frame = object_annotation.frames[0]
box = frame.normalized_bounding_box
print('Time offset of the first frame: {}s'.format(
frame.time_offset.seconds + frame.time_offset.nanos / 1e9))
print('Bounding box position:')
print('\tleft : {}'.format(box.left))
print('\ttop : {}'.format(box.top))
print('\tright : {}'.format(box.right))
print('\tbottom: {}'.format(box.bottom))
print('\n')
# [END video_object_tracking_gcs_beta]
return object_annotations


def track_objects(path):
# [START video_object_tracking_beta]
"""Object Tracking."""
from google.cloud import videointelligence_v1p2beta1 as videointelligence

video_client = videointelligence.VideoIntelligenceServiceClient()
features = [videointelligence.enums.Feature.OBJECT_TRACKING]

with io.open(path, 'rb') as file:
input_content = file.read()

# It is recommended to use location_id as 'us-east1' for the best latency
# due to different types of processors used in this region and others.
operation = video_client.annotate_video(
input_content=input_content, features=features, location_id='us-east1')
print('\nProcessing video for object annotations.')

result = operation.result(timeout=300)
print('\nFinished processing.\n')

# The first result is retrieved because a single video was processed.
object_annotations = result.annotation_results[0].object_annotations

# Get only the first annotation for demo purposes.
object_annotation = object_annotations[0]
print('Entity description: {}'.format(
object_annotation.entity.description))
if object_annotation.entity.entity_id:
print('Entity id: {}'.format(object_annotation.entity.entity_id))

print('Segment: {}s to {}s'.format(
object_annotation.segment.start_time_offset.seconds +
object_annotation.segment.start_time_offset.nanos / 1e9,
object_annotation.segment.end_time_offset.seconds +
object_annotation.segment.end_time_offset.nanos / 1e9))

print('Confidence: {}'.format(object_annotation.confidence))

# Here we print only the bounding box of the first frame in this segment
frame = object_annotation.frames[0]
box = frame.normalized_bounding_box
print('Time offset of the first frame: {}s'.format(
frame.time_offset.seconds + frame.time_offset.nanos / 1e9))
print('Bounding box position:')
print('\tleft : {}'.format(box.left))
print('\ttop : {}'.format(box.top))
print('\tright : {}'.format(box.right))
print('\tbottom: {}'.format(box.bottom))
print('\n')
# [END video_object_tracking_beta]
return object_annotations


if __name__ == '__main__':
Expand All @@ -79,7 +278,31 @@ def speech_transcription(input_uri):
'transcription', help=speech_transcription.__doc__)
speech_transcription_parser.add_argument('gcs_uri')

video_text_gcs_parser = subparsers.add_parser(
'video-text-gcs', help=video_detect_text_gcs.__doc__)
video_text_gcs_parser.add_argument('gcs_uri')

video_text_parser = subparsers.add_parser(
'video-text', help=video_detect_text.__doc__)
video_text_parser.add_argument('path')

video_object_tracking_gcs_parser = subparsers.add_parser(
'track-objects-gcs', help=track_objects_gcs.__doc__)
video_object_tracking_gcs_parser.add_argument('gcs_uri')

video_object_tracking_parser = subparsers.add_parser(
'track-objects', help=track_objects.__doc__)
video_object_tracking_parser.add_argument('path')

args = parser.parse_args()

if args.command == 'transcription':
speech_transcription(args.gcs_uri)
elif args.command == 'video-text-gcs':
video_detect_text_gcs(args.gcs_uri)
elif args.command == 'video-text':
video_detect_text(args.path)
elif args.command == 'track-objects-gcs':
track_objects_gcs(args.gcs_uri)
elif args.command == 'track-objects':
track_objects(args.path)
57 changes: 57 additions & 0 deletions 57 video/cloud-client/analyze/beta_snippets_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,67 @@

import beta_snippets

POSSIBLE_TEXTS = ['Google', 'SUR', 'SUR', 'ROTO', 'Vice President', '58oo9',
'LONDRES', 'OMAR', 'PARIS', 'METRO', 'RUE', 'CARLO']


@pytest.mark.slow
def test_speech_transcription(capsys):
beta_snippets.speech_transcription(
'gs://python-docs-samples-tests/video/googlework_short.mp4')
out, _ = capsys.readouterr()
assert 'cultural' in out


@pytest.mark.slow
def test_detect_text():
in_file = './resources/googlework_short.mp4'
text_annotations = beta_snippets.video_detect_text(in_file)

text_exists = False
for text_annotation in text_annotations:
for possible_text in POSSIBLE_TEXTS:
if possible_text.upper() in text_annotation.text.upper():
text_exists = True
assert text_exists


@pytest.mark.slow
def test_detect_text_gcs():
in_file = 'gs://python-docs-samples-tests/video/googlework_short.mp4'
text_annotations = beta_snippets.video_detect_text_gcs(in_file)

text_exists = False
for text_annotation in text_annotations:
for possible_text in POSSIBLE_TEXTS:
if possible_text.upper() in text_annotation.text.upper():
text_exists = True
assert text_exists


@pytest.mark.slow
def test_track_objects():
in_file = './resources/cat.mp4'
object_annotations = beta_snippets.track_objects(in_file)

text_exists = False
for object_annotation in object_annotations:
if 'CAT' in object_annotation.entity.description.upper():
text_exists = True
assert text_exists
assert object_annotations[0].frames[0].normalized_bounding_box.left >= 0.0
assert object_annotations[0].frames[0].normalized_bounding_box.left <= 1.0


@pytest.mark.slow
def test_track_objects_gcs():
in_file = 'gs://demomaker/cat.mp4'
object_annotations = beta_snippets.track_objects_gcs(in_file)

text_exists = False
for object_annotation in object_annotations:
if 'CAT' in object_annotation.entity.description.upper():
text_exists = True
assert text_exists
assert object_annotations[0].frames[0].normalized_bounding_box.left >= 0.0
assert object_annotations[0].frames[0].normalized_bounding_box.left <= 1.0
2 changes: 1 addition & 1 deletion 2 video/cloud-client/analyze/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
google-cloud-videointelligence==1.3.0
google-cloud-videointelligence==1.5.0
Binary file added BIN +6.1 MB video/cloud-client/analyze/resources/cat.mp4
Binary file not shown.
Binary file not shown.
Morty Proxy This is a proxified and sanitized view of the page, visit original site.