Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Multi Channel #1561

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 13, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions 1 speech/cloud-client/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ To run this sample:
python beta_snippets.py metadata resources/commercial_mono.wav
python beta_snippets.py punctuation resources/commercial_mono.wav
python beta_snippets.py diarization resources/commercial_mono.wav
python beta_snippets.py multi-channel resources/commercial_mono.wav

positional arguments:
command
Expand Down
86 changes: 68 additions & 18 deletions 86 speech/cloud-client/beta_snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,23 @@
python beta_snippets.py metadata resources/commercial_mono.wav
python beta_snippets.py punctuation resources/commercial_mono.wav
python beta_snippets.py diarization resources/commercial_mono.wav
python beta_snippets.py multi-channel resources/commercial_mono.wav
"""

import argparse
import io

from google.cloud import speech_v1p1beta1 as speech


# [START speech_transcribe_file_with_enhanced_model]
def transcribe_file_with_enhanced_model(path):
def transcribe_file_with_enhanced_model(speech_file):
"""Transcribe the given audio file using an enhanced model."""
# [START speech_transcribe_file_with_enhanced_model]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see this is a different way to use the region tags than commonly done in Python so far. Is this the convention going forward?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is a result of our most recent samples rubric working group. I'll be presenting on this in a team meeting post-Next.

from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

with io.open(path, 'rb') as audio_file:
# TODO(developer): Uncomment and set to a path to your audio file.
# speech_file = 'path/to/file.wav'

with io.open(speech_file, 'rb') as audio_file:
content = audio_file.read()

audio = speech.types.RecognitionAudio(content=content)
Expand All @@ -56,15 +59,19 @@ def transcribe_file_with_enhanced_model(path):
print('-' * 20)
print('First alternative of result {}'.format(i))
print('Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_file_with_enhanced_model]
# [END speech_transcribe_file_with_enhanced_model]


# [START speech_transcribe_file_with_metadata]
def transcribe_file_with_metadata(path):
def transcribe_file_with_metadata(speech_file):
"""Send a request that includes recognition metadata."""
# [START speech_transcribe_file_with_metadata]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

with io.open(path, 'rb') as audio_file:
# TODO(developer): Uncomment and set to a path to your audio file.
# speech_file = 'path/to/file.wav'

with io.open(speech_file, 'rb') as audio_file:
content = audio_file.read()

# Here we construct a recognition metadata object.
Expand Down Expand Up @@ -98,15 +105,19 @@ def transcribe_file_with_metadata(path):
print('-' * 20)
print('First alternative of result {}'.format(i))
print('Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_file_with_metadata]
# [END speech_transcribe_file_with_metadata]


# [START speech_transcribe_file_with_auto_punctuation]
def transcribe_file_with_auto_punctuation(path):
def transcribe_file_with_auto_punctuation(speech_file):
"""Transcribe the given audio file with auto punctuation enabled."""
# [START speech_transcribe_file_with_auto_punctuation]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

with io.open(path, 'rb') as audio_file:
# TODO(developer): Uncomment and set to a path to your audio file.
# speech_file = 'path/to/file.wav'

with io.open(speech_file, 'rb') as audio_file:
content = audio_file.read()

audio = speech.types.RecognitionAudio(content=content)
Expand All @@ -124,15 +135,19 @@ def transcribe_file_with_auto_punctuation(path):
print('-' * 20)
print('First alternative of result {}'.format(i))
print('Transcript: {}'.format(alternative.transcript))
# [END speech_transcribe_file_with_auto_punctuation]
# [END speech_transcribe_file_with_auto_punctuation]


# [START speech_transcribe_diarization]
def transcribe_file_with_diarization(path):
def transcribe_file_with_diarization(speech_file):
"""Transcribe the given audio file synchronously with diarization."""
# [START speech_transcribe_diarization]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

with open(path, 'rb') as audio_file:
# TODO(developer): Uncomment and set to a path to your audio file.
# speech_file = 'path/to/file.wav'

with open(speech_file, 'rb') as audio_file:
content = audio_file.read()

audio = speech.types.RecognitionAudio(content=content)
Expand All @@ -154,7 +169,40 @@ def transcribe_file_with_diarization(path):
.format(i, alternative.transcript))
print('Speaker Tag for the first word: {}'
.format(alternative.words[0].speaker_tag))
# [END speech_transcribe_diarization]
# [END speech_transcribe_diarization]


def transcribe_file_with_multichannel(speech_file):
"""Transcribe the given audio file synchronously with
multi channel."""
# [START speech_transcribe_multichannel]
from google.cloud import speech_v1p1beta1 as speech
client = speech.SpeechClient()

# TODO(developer): Uncomment and set to a path to your audio file.
# speech_file = 'path/to/file.wav'

with open(speech_file, 'rb') as audio_file:
content = audio_file.read()

audio = speech.types.RecognitionAudio(content=content)

config = speech.types.RecognitionConfig(
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US',
audio_channel_count=1,
enable_separate_recognition_per_channel=True)

response = client.recognize(config, audio)

for i, result in enumerate(response.results):
alternative = result.alternatives[0]
print('-' * 20)
print('First alternative of result {}'.format(i))
print(u'Transcript: {}'.format(alternative.transcript))
print(u'Channel Tag: {}'.format(result.channel_tag))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what happens where there are multiple channels? do we get the output from all channels into the same alternative? or does the output from the second channel go to the second alternative, and so on?

# [END speech_transcribe_multichannel]


if __name__ == '__main__':
Expand All @@ -175,3 +223,5 @@ def transcribe_file_with_diarization(path):
transcribe_file_with_auto_punctuation(args.path)
elif args.command == 'diarization':
transcribe_file_with_diarization(args.path)
elif args.command == 'multi-channel':
transcribe_file_with_multichannel(args.path)
11 changes: 10 additions & 1 deletion 11 speech/cloud-client/beta_snippets_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
transcribe_file_with_auto_punctuation,
transcribe_file_with_diarization,
transcribe_file_with_enhanced_model,
transcribe_file_with_metadata)
transcribe_file_with_metadata,
transcribe_file_with_multichannel)

RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')

Expand Down Expand Up @@ -52,3 +53,11 @@ def test_transcribe_diarization(capsys):
out, err = capsys.readouterr()

assert 'OK Google stream stranger things from Netflix to my TV' in out


def test_transcribe_multichannel_file(capsys):
transcribe_file_with_multichannel(
os.path.join(RESOURCES, 'Google_Gnome.wav'))
out, err = capsys.readouterr()

assert 'OK Google stream stranger things from Netflix to my TV' in out
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a single channel audio file, use an actual multichannel test file.

Morty Proxy This is a proxified and sanitized view of the page, visit original site.