18
18
Google Cloud API.
19
19
20
20
Usage Examples:
21
- python beta_snippets.py transcription \
22
- gs://python-docs-samples-tests/video/googlework_tiny.mp4
23
-
24
- python beta_snippets.py video-text-gcs \
25
- gs://python-docs-samples-tests/video/googlework_tiny.mp4
26
-
27
21
python beta_snippets.py streaming-labels resources/cat.mp4
28
22
29
23
python beta_snippets.py streaming-shot-change resources/cat.mp4
49
43
import io
50
44
51
45
52
- def speech_transcription (input_uri , timeout = 180 ):
53
- # [START video_speech_transcription_gcs_beta]
54
- """Transcribe speech from a video stored on GCS."""
55
- from google .cloud import videointelligence_v1p1beta1 as videointelligence
56
-
57
- video_client = videointelligence .VideoIntelligenceServiceClient ()
58
-
59
- features = [videointelligence .Feature .SPEECH_TRANSCRIPTION ]
60
-
61
- config = videointelligence .SpeechTranscriptionConfig (
62
- language_code = "en-US" , enable_automatic_punctuation = True
63
- )
64
- video_context = videointelligence .VideoContext (speech_transcription_config = config )
65
-
66
- operation = video_client .annotate_video (
67
- request = {
68
- "features" : features ,
69
- "input_uri" : input_uri ,
70
- "video_context" : video_context ,
71
- }
72
- )
73
-
74
- print ("\n Processing video for speech transcription." )
75
-
76
- result = operation .result (timeout )
77
-
78
- # There is only one annotation_result since only
79
- # one video is processed.
80
- annotation_results = result .annotation_results [0 ]
81
- for speech_transcription in annotation_results .speech_transcriptions :
82
- # The number of alternatives for each transcription is limited by
83
- # SpeechTranscriptionConfig.max_alternatives.
84
- # Each alternative is a different possible transcription
85
- # and has its own confidence score.
86
- for alternative in speech_transcription .alternatives :
87
- print ("Alternative level information:" )
88
-
89
- print ("Transcript: {}" .format (alternative .transcript ))
90
- print ("Confidence: {}\n " .format (alternative .confidence ))
91
-
92
- print ("Word level information:" )
93
- for word_info in alternative .words :
94
- word = word_info .word
95
- start_time = word_info .start_time
96
- end_time = word_info .end_time
97
- print (
98
- "\t {}s - {}s: {}" .format (
99
- start_time .seconds + start_time .microseconds * 1e-6 ,
100
- end_time .seconds + end_time .microseconds * 1e-6 ,
101
- word ,
102
- )
103
- )
104
- # [END video_speech_transcription_gcs_beta]
105
-
106
-
107
- def video_detect_text_gcs (input_uri ):
108
- # [START video_detect_text_gcs_beta]
109
- """Detect text in a video stored on GCS."""
110
- from google .cloud import videointelligence_v1p2beta1 as videointelligence
111
-
112
- video_client = videointelligence .VideoIntelligenceServiceClient ()
113
- features = [videointelligence .Feature .TEXT_DETECTION ]
114
-
115
- operation = video_client .annotate_video (
116
- request = {"features" : features , "input_uri" : input_uri }
117
- )
118
-
119
- print ("\n Processing video for text detection." )
120
- result = operation .result (timeout = 300 )
121
-
122
- # The first result is retrieved because a single video was processed.
123
- annotation_result = result .annotation_results [0 ]
124
-
125
- # Get only the first result
126
- text_annotation = annotation_result .text_annotations [0 ]
127
- print ("\n Text: {}" .format (text_annotation .text ))
128
-
129
- # Get the first text segment
130
- text_segment = text_annotation .segments [0 ]
131
- start_time = text_segment .segment .start_time_offset
132
- end_time = text_segment .segment .end_time_offset
133
- print (
134
- "start_time: {}, end_time: {}" .format (
135
- start_time .seconds + start_time .microseconds * 1e-6 ,
136
- end_time .seconds + end_time .microseconds * 1e-6 ,
137
- )
138
- )
139
-
140
- print ("Confidence: {}" .format (text_segment .confidence ))
141
-
142
- # Show the result for the first frame in this segment.
143
- frame = text_segment .frames [0 ]
144
- time_offset = frame .time_offset
145
- print (
146
- "Time offset for the first frame: {}" .format (
147
- time_offset .seconds + time_offset .microseconds * 1e-6
148
- )
149
- )
150
- print ("Rotated Bounding Box Vertices:" )
151
- for vertex in frame .rotated_bounding_box .vertices :
152
- print ("\t Vertex.x: {}, Vertex.y: {}" .format (vertex .x , vertex .y ))
153
- # [END video_detect_text_gcs_beta]
154
- return annotation_result .text_annotations
155
-
156
-
157
- def video_detect_text (path ):
158
- # [START video_detect_text_beta]
159
- """Detect text in a local video."""
160
- from google .cloud import videointelligence_v1p2beta1 as videointelligence
161
-
162
- video_client = videointelligence .VideoIntelligenceServiceClient ()
163
- features = [videointelligence .Feature .TEXT_DETECTION ]
164
- video_context = videointelligence .VideoContext ()
165
-
166
- with io .open (path , "rb" ) as file :
167
- input_content = file .read ()
168
-
169
- operation = video_client .annotate_video (
170
- request = {
171
- "features" : features ,
172
- "input_content" : input_content ,
173
- "video_context" : video_context ,
174
- }
175
- )
176
-
177
- print ("\n Processing video for text detection." )
178
- result = operation .result (timeout = 300 )
179
-
180
- # The first result is retrieved because a single video was processed.
181
- annotation_result = result .annotation_results [0 ]
182
-
183
- # Get only the first result
184
- text_annotation = annotation_result .text_annotations [0 ]
185
- print ("\n Text: {}" .format (text_annotation .text ))
186
-
187
- # Get the first text segment
188
- text_segment = text_annotation .segments [0 ]
189
- start_time = text_segment .segment .start_time_offset
190
- end_time = text_segment .segment .end_time_offset
191
- print (
192
- "start_time: {}, end_time: {}" .format (
193
- start_time .seconds + start_time .microseconds * 1e-6 ,
194
- end_time .seconds + end_time .microseconds * 1e-6 ,
195
- )
196
- )
197
-
198
- print ("Confidence: {}" .format (text_segment .confidence ))
199
-
200
- # Show the result for the first frame in this segment.
201
- frame = text_segment .frames [0 ]
202
- time_offset = frame .time_offset
203
- print (
204
- "Time offset for the first frame: {}" .format (
205
- time_offset .seconds + time_offset .microseconds * 1e-6
206
- )
207
- )
208
- print ("Rotated Bounding Box Vertices:" )
209
- for vertex in frame .rotated_bounding_box .vertices :
210
- print ("\t Vertex.x: {}, Vertex.y: {}" .format (vertex .x , vertex .y ))
211
- # [END video_detect_text_beta]
212
- return annotation_result .text_annotations
213
-
214
-
215
46
def detect_labels_streaming (path ):
216
47
# [START video_streaming_label_detection_beta]
217
48
from google .cloud import videointelligence_v1p3beta1 as videointelligence
@@ -826,21 +657,6 @@ def stream_generator():
826
657
)
827
658
subparsers = parser .add_subparsers (dest = "command" )
828
659
829
- speech_transcription_parser = subparsers .add_parser (
830
- "transcription" , help = speech_transcription .__doc__
831
- )
832
- speech_transcription_parser .add_argument ("gcs_uri" )
833
-
834
- video_text_gcs_parser = subparsers .add_parser (
835
- "video-text-gcs" , help = video_detect_text_gcs .__doc__
836
- )
837
- video_text_gcs_parser .add_argument ("gcs_uri" )
838
-
839
- video_text_parser = subparsers .add_parser (
840
- "video-text" , help = video_detect_text .__doc__
841
- )
842
- video_text_parser .add_argument ("path" )
843
-
844
660
video_streaming_labels_parser = subparsers .add_parser (
845
661
"streaming-labels" , help = detect_labels_streaming .__doc__
846
662
)
@@ -892,13 +708,7 @@ def stream_generator():
892
708
893
709
args = parser .parse_args ()
894
710
895
- if args .command == "transcription" :
896
- speech_transcription (args .gcs_uri )
897
- elif args .command == "video-text-gcs" :
898
- video_detect_text_gcs (args .gcs_uri )
899
- elif args .command == "video-text" :
900
- video_detect_text (args .path )
901
- elif args .command == "streaming-labels" :
711
+ if args .command == "streaming-labels" :
902
712
detect_labels_streaming (args .path )
903
713
elif args .command == "streaming-shot-change" :
904
714
detect_shot_change_streaming (args .path )
0 commit comments