From 4f08c103a4449a892c06ed614dc1f2c001f8db9e Mon Sep 17 00:00:00 2001 From: Xinjie Zheng Date: Fri, 1 Jul 2016 20:12:51 -0700 Subject: [PATCH 1/2] Adding async sample code to speech cloud --- speech/api/requirements-speech_grpc.txt | 2 +- speech/api/speech_async_grpc.py | 105 ++++++++++++++++++ speech/api/speech_async_grpc_test.py | 38 +++++++ speech/api/speech_async_rest.py | 95 ++++++++++++++++ speech/api/speech_async_rest_test.py | 23 ++++ speech/api/{speech_gcs.py => speech_grpc.py} | 0 ...speech_gcs_test.py => speech_grpc_test.py} | 4 +- 7 files changed, 264 insertions(+), 3 deletions(-) create mode 100644 speech/api/speech_async_grpc.py create mode 100644 speech/api/speech_async_grpc_test.py create mode 100644 speech/api/speech_async_rest.py create mode 100644 speech/api/speech_async_rest_test.py rename speech/api/{speech_gcs.py => speech_grpc.py} (100%) rename speech/api/{speech_gcs_test.py => speech_grpc_test.py} (94%) diff --git a/speech/api/requirements-speech_grpc.txt b/speech/api/requirements-speech_grpc.txt index 443a444c5de..9a3a75a322f 100644 --- a/speech/api/requirements-speech_grpc.txt +++ b/speech/api/requirements-speech_grpc.txt @@ -1,4 +1,4 @@ gcloud==0.17.0 grpcio==0.14.0 PyAudio==0.2.9 -grpc-google-cloud-speech-v1beta1==1.0.0 +grpc-google-cloud-speech-v1beta1==1.0.1 diff --git a/speech/api/speech_async_grpc.py b/speech/api/speech_async_grpc.py new file mode 100644 index 00000000000..3095e6f37ef --- /dev/null +++ b/speech/api/speech_async_grpc.py @@ -0,0 +1,105 @@ +#!/usr/bin/python +# Copyright (C) 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Sample that transcribes a FLAC audio file stored in Google Cloud Storage, +using async GRPC.""" + +import argparse +import time + +from gcloud.credentials import get_credentials +from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech +from google.longrunning import operations_grpc_pb2 as operations_grpc +from grpc.beta import implementations + +# Keep the request alive for this many seconds +DEADLINE_SECS = 10 +SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform' + + +def make_channel(host, port): + """Creates an SSL channel with auth credentials from the environment.""" + # In order to make an https call, use an ssl channel with defaults + ssl_channel = implementations.ssl_channel_credentials(None, None, None) + + # Grab application default credentials from the environment + creds = get_credentials().create_scoped([SPEECH_SCOPE]) + # Add a plugin to inject the creds into the header + auth_header = ( + 'Authorization', + 'Bearer ' + creds.get_access_token().access_token) + auth_plugin = implementations.metadata_call_credentials( + lambda _, cb: cb([auth_header], None), + name='google_creds') + + # compose the two together for both ssl and google auth + composite_channel = implementations.composite_channel_credentials( + ssl_channel, auth_plugin) + + return implementations.secure_channel(host, port, composite_channel) + + +def main(input_uri, encoding, sample_rate): + channel = make_channel('speech.googleapis.com', 443) + service = cloud_speech.beta_create_Speech_stub(channel) + # The method and parameters can be inferred from the proto from which the + # grpc client lib was generated. See: + # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto + response = service.AsyncRecognize(cloud_speech.AsyncRecognizeRequest( + config=cloud_speech.RecognitionConfig( + encoding=encoding, + sample_rate=sample_rate, + ), + audio=cloud_speech.RecognitionAudio( + uri=input_uri, + ) + ), DEADLINE_SECS) + # Print the longrunning operation handle. + print(response) + + # Give the server a few seconds to process. + print('Waiting 5 seconds for server processing...') + time.sleep(5) + # Construct a long running operation endpoint. + service = operations_grpc.beta_create_Operations_stub(channel) + # Get the long running operation with response. + response = service.GetOperation( + operations_grpc.GetOperationRequest(name=response.name), + DEADLINE_SECS) + # Print the recognition results. + results = cloud_speech.AsyncRecognizeResponse() + response.response.Unpack(results) + print(results) + + +def _gcs_uri(text): + if not text.startswith('gs://'): + raise ValueError( + 'Cloud Storage uri must be of the form gs://bucket/path/') + return text + + +PROTO_URL = ('https://github.com/googleapis/googleapis/blob/master/' + 'google/cloud/speech/v1beta1/cloud_speech.proto') +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('input_uri', type=_gcs_uri) + parser.add_argument( + '--encoding', default='FLAC', choices=[ + 'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'], + help='How the audio file is encoded. See {}#L67'.format(PROTO_URL)) + parser.add_argument('--sample_rate', default=16000) + + args = parser.parse_args() + main(args.input_uri, args.encoding, args.sample_rate) diff --git a/speech/api/speech_async_grpc_test.py b/speech/api/speech_async_grpc_test.py new file mode 100644 index 00000000000..481a2610f8e --- /dev/null +++ b/speech/api/speech_async_grpc_test.py @@ -0,0 +1,38 @@ +# Copyright 2016, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import sys + +import pytest +from speech_async_grpc import _gcs_uri +from speech_async_grpc import main + + +@pytest.mark.skipif( + sys.version_info >= (3, 0), + reason=("grpc doesn't yet support python3 " + 'https://github.com/grpc/grpc/issues/282')) +def test_main(cloud_config, capsys): + input_uri = 'gs://{}/speech/audio.flac'.format(cloud_config.storage_bucket) + + main(input_uri, 'FLAC', 16000) + + out, err = capsys.readouterr() + assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I) + + +def test_gcs_uri(): + _gcs_uri('gs://bucket/path') + with pytest.raises(ValueError): + _gcs_uri('/local/path') diff --git a/speech/api/speech_async_rest.py b/speech/api/speech_async_rest.py new file mode 100644 index 00000000000..954f8e707f9 --- /dev/null +++ b/speech/api/speech_async_rest.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Speech API sample application using the REST API for async +batch processing.""" + +# [START import_libraries] +import argparse +import base64 +import json +import time + +from googleapiclient import discovery +import httplib2 +from oauth2client.client import GoogleCredentials +# [END import_libraries] + + +# [START authenticating] +DISCOVERY_URL = ('https://{api}.googleapis.com/$discovery/rest?' + 'version={apiVersion}') + + +# Application default credentials provided by env variable +# GOOGLE_APPLICATION_CREDENTIALS +def get_speech_service(): + credentials = GoogleCredentials.get_application_default().create_scoped( + ['https://www.googleapis.com/auth/cloud-platform']) + http = httplib2.Http() + credentials.authorize(http) + + return discovery.build( + 'speech', 'v1beta1', http=http, discoveryServiceUrl=DISCOVERY_URL) +# [END authenticating] + + +def main(speech_file): + """Transcribe the given audio file asynchronously. + + Args: + speech_file: the name of the audio file. + """ + # [START construct_request] + with open(speech_file, 'rb') as speech: + # Base64 encode the binary audio file for inclusion in the JSON + # request. + speech_content = base64.b64encode(speech.read()) + + service = get_speech_service() + service_request = service.speech().asyncrecognize( + body={ + 'config': { + 'encoding': 'LINEAR16', + 'sampleRate': 16000 + }, + 'audio': { + 'content': speech_content.decode('UTF-8') + } + }) + # [END construct_request] + # [START send_request] + response = service_request.execute() + print(json.dumps(response)) + # [END send_request] + + # Give the server a few seconds to process. + print('Waiting 5 seconds for server processing...') + time.sleep(5) + # Construct a GetOperation request. + name = response['name'] + service_request = service.operations().get(name=name) + # Get the long running operation with response. + response = service_request.execute() + print(json.dumps(response['response']['results'])) + + +# [START run_application] +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + 'speech_file', help='Full path of audio file to be recognized') + args = parser.parse_args() + main(args.speech_file) + # [END run_application] diff --git a/speech/api/speech_async_rest_test.py b/speech/api/speech_async_rest_test.py new file mode 100644 index 00000000000..d9f79e6aac5 --- /dev/null +++ b/speech/api/speech_async_rest_test.py @@ -0,0 +1,23 @@ +# Copyright 2016, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +from speech_async_rest import main + + +def test_main(resource, capsys): + main(resource('audio.raw')) + out, err = capsys.readouterr() + + assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I) diff --git a/speech/api/speech_gcs.py b/speech/api/speech_grpc.py similarity index 100% rename from speech/api/speech_gcs.py rename to speech/api/speech_grpc.py diff --git a/speech/api/speech_gcs_test.py b/speech/api/speech_grpc_test.py similarity index 94% rename from speech/api/speech_gcs_test.py rename to speech/api/speech_grpc_test.py index 56f2b4a42d3..8b0ae9b6f61 100644 --- a/speech/api/speech_gcs_test.py +++ b/speech/api/speech_grpc_test.py @@ -15,8 +15,8 @@ import sys import pytest -from speech_gcs import _gcs_uri -from speech_gcs import main +from speech_grpc import _gcs_uri +from speech_grpc import main @pytest.mark.skipif( From c413af8b7d5cc0f1bc38660b01fbc31e2d577bf3 Mon Sep 17 00:00:00 2001 From: xinjie Date: Sat, 2 Jul 2016 09:26:25 -0700 Subject: [PATCH 2/2] Better waiting for long running operation --- speech/api/speech_async_grpc.py | 19 ++++++++++++------- speech/api/speech_async_rest.py | 16 ++++++++++------ 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/speech/api/speech_async_grpc.py b/speech/api/speech_async_grpc.py index 3095e6f37ef..55d4bfe1a19 100644 --- a/speech/api/speech_async_grpc.py +++ b/speech/api/speech_async_grpc.py @@ -68,15 +68,20 @@ def main(input_uri, encoding, sample_rate): # Print the longrunning operation handle. print(response) - # Give the server a few seconds to process. - print('Waiting 5 seconds for server processing...') - time.sleep(5) # Construct a long running operation endpoint. service = operations_grpc.beta_create_Operations_stub(channel) - # Get the long running operation with response. - response = service.GetOperation( - operations_grpc.GetOperationRequest(name=response.name), - DEADLINE_SECS) + + name = response.name + while True: + # Give the server a few seconds to process. + print('Waiting for server processing...') + time.sleep(1) + # Get the long running operation with response. + response = service.GetOperation( + operations_grpc.GetOperationRequest(name=name), DEADLINE_SECS) + if response.done: + break + # Print the recognition results. results = cloud_speech.AsyncRecognizeResponse() response.response.Unpack(results) diff --git a/speech/api/speech_async_rest.py b/speech/api/speech_async_rest.py index 954f8e707f9..647a843d3ae 100644 --- a/speech/api/speech_async_rest.py +++ b/speech/api/speech_async_rest.py @@ -74,14 +74,18 @@ def main(speech_file): print(json.dumps(response)) # [END send_request] - # Give the server a few seconds to process. - print('Waiting 5 seconds for server processing...') - time.sleep(5) - # Construct a GetOperation request. name = response['name'] + # Construct a GetOperation request. service_request = service.operations().get(name=name) - # Get the long running operation with response. - response = service_request.execute() + while True: + # Give the server a few seconds to process. + print('Waiting for server processing...') + time.sleep(1) + # Get the long running operation with response. + response = service_request.execute() + if 'done' in response and response['done']: + break + print(json.dumps(response['response']['results']))