Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 93124e0

Browse filesBrowse files
dizcologyandrewsg
authored andcommitted
Face detection beta features (GoogleCloudPlatform#1414)
1 parent c24722f commit 93124e0
Copy full SHA for 93124e0

File tree

Expand file treeCollapse file tree

3 files changed

+253
-1
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+253
-1
lines changed
+203Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2017 Google Inc. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""This application demonstrates face detection, face emotions
18+
and speech transcription using the Google Cloud API.
19+
20+
Usage Examples:
21+
python beta_snippets.py boxes \
22+
gs://python-docs-samples-tests/video/googlework_short.mp4
23+
24+
python beta_snippets.py \
25+
emotions gs://python-docs-samples-tests/video/googlework_short.mp4
26+
27+
python beta_snippets.py \
28+
transcription gs://python-docs-samples-tests/video/googlework_short.mp4
29+
"""
30+
31+
import argparse
32+
33+
from google.cloud import videointelligence_v1p1beta1 as videointelligence
34+
35+
36+
# [START video_face_bounding_boxes]
37+
def face_bounding_boxes(gcs_uri):
38+
""" Detects faces' bounding boxes. """
39+
video_client = videointelligence.VideoIntelligenceServiceClient()
40+
features = [videointelligence.enums.Feature.FACE_DETECTION]
41+
42+
config = videointelligence.types.FaceConfig(
43+
include_bounding_boxes=True)
44+
context = videointelligence.types.VideoContext(
45+
face_detection_config=config)
46+
47+
operation = video_client.annotate_video(
48+
gcs_uri, features=features, video_context=context)
49+
print('\nProcessing video for face annotations:')
50+
51+
result = operation.result(timeout=900)
52+
print('\nFinished processing.')
53+
54+
# There is only one result because a single video was processed.
55+
faces = result.annotation_results[0].face_detection_annotations
56+
for i, face in enumerate(faces):
57+
print('Face {}'.format(i))
58+
59+
# Each face_detection_annotation has only one segment.
60+
segment = face.segments[0]
61+
start_time = (segment.segment.start_time_offset.seconds +
62+
segment.segment.start_time_offset.nanos / 1e9)
63+
end_time = (segment.segment.end_time_offset.seconds +
64+
segment.segment.end_time_offset.nanos / 1e9)
65+
positions = '{}s to {}s'.format(start_time, end_time)
66+
print('\tSegment: {}\n'.format(positions))
67+
68+
# Each detected face may appear in many frames of the video.
69+
# Here we process only the first frame.
70+
frame = face.frames[0]
71+
72+
time_offset = (frame.time_offset.seconds +
73+
frame.time_offset.nanos / 1e9)
74+
box = frame.attributes[0].normalized_bounding_box
75+
76+
print('First frame time offset: {}s\n'.format(time_offset))
77+
78+
print('First frame normalized bounding box:')
79+
print('\tleft : {}'.format(box.left))
80+
print('\ttop : {}'.format(box.top))
81+
print('\tright : {}'.format(box.right))
82+
print('\tbottom: {}'.format(box.bottom))
83+
print('\n')
84+
# [END video_face_bounding_boxes]
85+
86+
87+
# [START video_face_emotions]
88+
def face_emotions(gcs_uri):
89+
""" Analyze faces' emotions over frames. """
90+
video_client = videointelligence.VideoIntelligenceServiceClient()
91+
features = [videointelligence.enums.Feature.FACE_DETECTION]
92+
93+
config = videointelligence.types.FaceConfig(
94+
include_emotions=True)
95+
context = videointelligence.types.VideoContext(
96+
face_detection_config=config)
97+
98+
operation = video_client.annotate_video(
99+
gcs_uri, features=features, video_context=context)
100+
print('\nProcessing video for face annotations:')
101+
102+
result = operation.result(timeout=600)
103+
print('\nFinished processing.')
104+
105+
# There is only one result because a single video was processed.
106+
faces = result.annotation_results[0].face_detection_annotations
107+
for i, face in enumerate(faces):
108+
for j, frame in enumerate(face.frames):
109+
time_offset = (frame.time_offset.seconds +
110+
frame.time_offset.nanos / 1e9)
111+
emotions = frame.attributes[0].emotions
112+
113+
print('Face {}, frame {}, time_offset {}\n'.format(
114+
i, j, time_offset))
115+
116+
# from videointelligence.enums
117+
emotion_labels = (
118+
'EMOTION_UNSPECIFIED', 'AMUSEMENT', 'ANGER',
119+
'CONCENTRATION', 'CONTENTMENT', 'DESIRE',
120+
'DISAPPOINTMENT', 'DISGUST', 'ELATION',
121+
'EMBARRASSMENT', 'INTEREST', 'PRIDE', 'SADNESS',
122+
'SURPRISE')
123+
124+
for emotion in emotions:
125+
emotion_index = emotion.emotion
126+
emotion_label = emotion_labels[emotion_index]
127+
emotion_score = emotion.score
128+
129+
print('emotion: {} (confidence score: {})'.format(
130+
emotion_label, emotion_score))
131+
132+
print('\n')
133+
134+
print('\n')
135+
# [END video_face_emotions]
136+
137+
138+
# [START video_speech_transcription]
139+
def speech_transcription(input_uri):
140+
"""Transcribe speech from a video stored on GCS."""
141+
video_client = videointelligence.VideoIntelligenceServiceClient()
142+
143+
features = [videointelligence.enums.Feature.SPEECH_TRANSCRIPTION]
144+
145+
config = videointelligence.types.SpeechTranscriptionConfig(
146+
language_code='en-US')
147+
video_context = videointelligence.types.VideoContext(
148+
speech_transcription_config=config)
149+
150+
operation = video_client.annotate_video(
151+
input_uri, features=features,
152+
video_context=video_context)
153+
154+
print('\nProcessing video for speech transcription.')
155+
156+
result = operation.result(timeout=180)
157+
158+
# There is only one annotation_result since only
159+
# one video is processed.
160+
annotation_results = result.annotation_results[0]
161+
speech_transcription = annotation_results.speech_transcriptions[0]
162+
alternative = speech_transcription.alternatives[0]
163+
164+
print('Transcript: {}'.format(alternative.transcript))
165+
print('Confidence: {}\n'.format(alternative.confidence))
166+
167+
print('Word level information:')
168+
for word_info in alternative.words:
169+
word = word_info.word
170+
start_time = word_info.start_time
171+
end_time = word_info.end_time
172+
print('\t{}s - {}s: {}'.format(
173+
start_time.seconds + start_time.nanos * 1e-9,
174+
end_time.seconds + end_time.nanos * 1e-9,
175+
word))
176+
# [END video_speech_transcription]
177+
178+
179+
if __name__ == '__main__':
180+
parser = argparse.ArgumentParser(
181+
description=__doc__,
182+
formatter_class=argparse.RawDescriptionHelpFormatter)
183+
subparsers = parser.add_subparsers(dest='command')
184+
analyze_faces_parser = subparsers.add_parser(
185+
'boxes', help=face_bounding_boxes.__doc__)
186+
analyze_faces_parser.add_argument('gcs_uri')
187+
188+
analyze_emotions_parser = subparsers.add_parser(
189+
'emotions', help=face_emotions.__doc__)
190+
analyze_emotions_parser.add_argument('gcs_uri')
191+
192+
speech_transcription_parser = subparsers.add_parser(
193+
'transcription', help=speech_transcription.__doc__)
194+
speech_transcription_parser.add_argument('gcs_uri')
195+
196+
args = parser.parse_args()
197+
198+
if args.command == 'boxes':
199+
face_bounding_boxes(args.gcs_uri)
200+
elif args.command == 'emotions':
201+
face_emotions(args.gcs_uri)
202+
elif args.command == 'transcription':
203+
speech_transcription(args.gcs_uri)
+49Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2017 Google, Inc
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
import os
18+
19+
import pytest
20+
21+
import beta_snippets
22+
23+
24+
BUCKET = os.environ['CLOUD_STORAGE_BUCKET']
25+
FACES_SHORT_FILE_PATH = 'video/googlework_short.mp4'
26+
27+
28+
@pytest.mark.slow
29+
def test_face_bounding_boxes(capsys):
30+
beta_snippets.face_bounding_boxes(
31+
'gs://{}/{}'.format(BUCKET, FACES_SHORT_FILE_PATH))
32+
out, _ = capsys.readouterr()
33+
assert 'top :' in out
34+
35+
36+
@pytest.mark.slow
37+
def test_face_emotions(capsys):
38+
beta_snippets.face_emotions(
39+
'gs://{}/{}'.format(BUCKET, FACES_SHORT_FILE_PATH))
40+
out, _ = capsys.readouterr()
41+
assert 'CONCENTRATION' in out
42+
43+
44+
@pytest.mark.slow
45+
def test_speech_transcription(capsys):
46+
beta_snippets.speech_transcription(
47+
'gs://{}/{}'.format(BUCKET, FACES_SHORT_FILE_PATH))
48+
out, _ = capsys.readouterr()
49+
assert 'cultural' in out
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
google-cloud-videointelligence==1.0.1
1+
google-cloud-videointelligence==1.1.0

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.