14
14
# See the License for the specific language governing permissions and
15
15
# limitations under the License.
16
16
17
- """This application demonstrates face detection, face emotions
18
- and speech transcription using the Google Cloud API.
17
+ """This application demonstrates speech transcription using the
18
+ Google Cloud API.
19
19
20
20
Usage Examples:
21
- python beta_snippets.py boxes \
22
- gs://python-docs-samples-tests/video/googlework_short.mp4
23
-
24
- python beta_snippets.py \
25
- emotions gs://python-docs-samples-tests/video/googlework_short.mp4
26
-
27
21
python beta_snippets.py \
28
22
transcription gs://python-docs-samples-tests/video/googlework_short.mp4
29
23
"""
33
27
from google .cloud import videointelligence_v1p1beta1 as videointelligence
34
28
35
29
36
- # [START video_face_bounding_boxes]
37
- def face_bounding_boxes (gcs_uri ):
38
- """ Detects faces' bounding boxes. """
39
- video_client = videointelligence .VideoIntelligenceServiceClient ()
40
- features = [videointelligence .enums .Feature .FACE_DETECTION ]
41
-
42
- config = videointelligence .types .FaceConfig (
43
- include_bounding_boxes = True )
44
- context = videointelligence .types .VideoContext (
45
- face_detection_config = config )
46
-
47
- operation = video_client .annotate_video (
48
- gcs_uri , features = features , video_context = context )
49
- print ('\n Processing video for face annotations:' )
50
-
51
- result = operation .result (timeout = 900 )
52
- print ('\n Finished processing.' )
53
-
54
- # There is only one result because a single video was processed.
55
- faces = result .annotation_results [0 ].face_detection_annotations
56
- for i , face in enumerate (faces ):
57
- print ('Face {}' .format (i ))
58
-
59
- # Each face_detection_annotation has only one segment.
60
- segment = face .segments [0 ]
61
- start_time = (segment .segment .start_time_offset .seconds +
62
- segment .segment .start_time_offset .nanos / 1e9 )
63
- end_time = (segment .segment .end_time_offset .seconds +
64
- segment .segment .end_time_offset .nanos / 1e9 )
65
- positions = '{}s to {}s' .format (start_time , end_time )
66
- print ('\t Segment: {}\n ' .format (positions ))
67
-
68
- # Each detected face may appear in many frames of the video.
69
- # Here we process only the first frame.
70
- frame = face .frames [0 ]
71
-
72
- time_offset = (frame .time_offset .seconds +
73
- frame .time_offset .nanos / 1e9 )
74
- box = frame .attributes [0 ].normalized_bounding_box
75
-
76
- print ('First frame time offset: {}s\n ' .format (time_offset ))
77
-
78
- print ('First frame normalized bounding box:' )
79
- print ('\t left : {}' .format (box .left ))
80
- print ('\t top : {}' .format (box .top ))
81
- print ('\t right : {}' .format (box .right ))
82
- print ('\t bottom: {}' .format (box .bottom ))
83
- print ('\n ' )
84
- # [END video_face_bounding_boxes]
85
-
86
-
87
- # [START video_face_emotions]
88
- def face_emotions (gcs_uri ):
89
- """ Analyze faces' emotions over frames. """
90
- video_client = videointelligence .VideoIntelligenceServiceClient ()
91
- features = [videointelligence .enums .Feature .FACE_DETECTION ]
92
-
93
- config = videointelligence .types .FaceConfig (
94
- include_emotions = True )
95
- context = videointelligence .types .VideoContext (
96
- face_detection_config = config )
97
-
98
- operation = video_client .annotate_video (
99
- gcs_uri , features = features , video_context = context )
100
- print ('\n Processing video for face annotations:' )
101
-
102
- result = operation .result (timeout = 600 )
103
- print ('\n Finished processing.' )
104
-
105
- # There is only one result because a single video was processed.
106
- faces = result .annotation_results [0 ].face_detection_annotations
107
- for i , face in enumerate (faces ):
108
- for j , frame in enumerate (face .frames ):
109
- time_offset = (frame .time_offset .seconds +
110
- frame .time_offset .nanos / 1e9 )
111
- emotions = frame .attributes [0 ].emotions
112
-
113
- print ('Face {}, frame {}, time_offset {}\n ' .format (
114
- i , j , time_offset ))
115
-
116
- # from videointelligence.enums
117
- emotion_labels = (
118
- 'EMOTION_UNSPECIFIED' , 'AMUSEMENT' , 'ANGER' ,
119
- 'CONCENTRATION' , 'CONTENTMENT' , 'DESIRE' ,
120
- 'DISAPPOINTMENT' , 'DISGUST' , 'ELATION' ,
121
- 'EMBARRASSMENT' , 'INTEREST' , 'PRIDE' , 'SADNESS' ,
122
- 'SURPRISE' )
123
-
124
- for emotion in emotions :
125
- emotion_index = emotion .emotion
126
- emotion_label = emotion_labels [emotion_index ]
127
- emotion_score = emotion .score
128
-
129
- print ('emotion: {} (confidence score: {})' .format (
130
- emotion_label , emotion_score ))
131
-
132
- print ('\n ' )
133
-
134
- print ('\n ' )
135
- # [END video_face_emotions]
136
-
137
-
138
30
# [START video_speech_transcription]
139
31
def speech_transcription (input_uri ):
140
32
"""Transcribe speech from a video stored on GCS."""
@@ -181,23 +73,12 @@ def speech_transcription(input_uri):
181
73
description = __doc__ ,
182
74
formatter_class = argparse .RawDescriptionHelpFormatter )
183
75
subparsers = parser .add_subparsers (dest = 'command' )
184
- analyze_faces_parser = subparsers .add_parser (
185
- 'boxes' , help = face_bounding_boxes .__doc__ )
186
- analyze_faces_parser .add_argument ('gcs_uri' )
187
-
188
- analyze_emotions_parser = subparsers .add_parser (
189
- 'emotions' , help = face_emotions .__doc__ )
190
- analyze_emotions_parser .add_argument ('gcs_uri' )
191
76
192
77
speech_transcription_parser = subparsers .add_parser (
193
78
'transcription' , help = speech_transcription .__doc__ )
194
79
speech_transcription_parser .add_argument ('gcs_uri' )
195
80
196
81
args = parser .parse_args ()
197
82
198
- if args .command == 'boxes' :
199
- face_bounding_boxes (args .gcs_uri )
200
- elif args .command == 'emotions' :
201
- face_emotions (args .gcs_uri )
202
- elif args .command == 'transcription' :
83
+ if args .command == 'transcription' :
203
84
speech_transcription (args .gcs_uri )
0 commit comments