-
Notifications
You must be signed in to change notification settings - Fork 6.6k
Multi Channel #1561
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Multi Channel #1561
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,20 +22,23 @@ | |
python beta_snippets.py metadata resources/commercial_mono.wav | ||
python beta_snippets.py punctuation resources/commercial_mono.wav | ||
python beta_snippets.py diarization resources/commercial_mono.wav | ||
python beta_snippets.py multi-channel resources/commercial_mono.wav | ||
""" | ||
|
||
import argparse | ||
import io | ||
|
||
from google.cloud import speech_v1p1beta1 as speech | ||
|
||
|
||
# [START speech_transcribe_file_with_enhanced_model] | ||
def transcribe_file_with_enhanced_model(path): | ||
def transcribe_file_with_enhanced_model(speech_file): | ||
"""Transcribe the given audio file using an enhanced model.""" | ||
# [START speech_transcribe_file_with_enhanced_model] | ||
from google.cloud import speech_v1p1beta1 as speech | ||
client = speech.SpeechClient() | ||
|
||
with io.open(path, 'rb') as audio_file: | ||
# TODO(developer): Uncomment and set to a path to your audio file. | ||
# speech_file = 'path/to/file.wav' | ||
|
||
with io.open(speech_file, 'rb') as audio_file: | ||
content = audio_file.read() | ||
|
||
audio = speech.types.RecognitionAudio(content=content) | ||
|
@@ -56,15 +59,19 @@ def transcribe_file_with_enhanced_model(path): | |
print('-' * 20) | ||
print('First alternative of result {}'.format(i)) | ||
print('Transcript: {}'.format(alternative.transcript)) | ||
# [END speech_transcribe_file_with_enhanced_model] | ||
# [END speech_transcribe_file_with_enhanced_model] | ||
|
||
|
||
# [START speech_transcribe_file_with_metadata] | ||
def transcribe_file_with_metadata(path): | ||
def transcribe_file_with_metadata(speech_file): | ||
"""Send a request that includes recognition metadata.""" | ||
# [START speech_transcribe_file_with_metadata] | ||
from google.cloud import speech_v1p1beta1 as speech | ||
client = speech.SpeechClient() | ||
|
||
with io.open(path, 'rb') as audio_file: | ||
# TODO(developer): Uncomment and set to a path to your audio file. | ||
# speech_file = 'path/to/file.wav' | ||
|
||
with io.open(speech_file, 'rb') as audio_file: | ||
content = audio_file.read() | ||
|
||
# Here we construct a recognition metadata object. | ||
|
@@ -98,15 +105,19 @@ def transcribe_file_with_metadata(path): | |
print('-' * 20) | ||
print('First alternative of result {}'.format(i)) | ||
print('Transcript: {}'.format(alternative.transcript)) | ||
# [END speech_transcribe_file_with_metadata] | ||
# [END speech_transcribe_file_with_metadata] | ||
|
||
|
||
# [START speech_transcribe_file_with_auto_punctuation] | ||
def transcribe_file_with_auto_punctuation(path): | ||
def transcribe_file_with_auto_punctuation(speech_file): | ||
"""Transcribe the given audio file with auto punctuation enabled.""" | ||
# [START speech_transcribe_file_with_auto_punctuation] | ||
from google.cloud import speech_v1p1beta1 as speech | ||
client = speech.SpeechClient() | ||
|
||
with io.open(path, 'rb') as audio_file: | ||
# TODO(developer): Uncomment and set to a path to your audio file. | ||
# speech_file = 'path/to/file.wav' | ||
|
||
with io.open(speech_file, 'rb') as audio_file: | ||
content = audio_file.read() | ||
|
||
audio = speech.types.RecognitionAudio(content=content) | ||
|
@@ -124,15 +135,19 @@ def transcribe_file_with_auto_punctuation(path): | |
print('-' * 20) | ||
print('First alternative of result {}'.format(i)) | ||
print('Transcript: {}'.format(alternative.transcript)) | ||
# [END speech_transcribe_file_with_auto_punctuation] | ||
# [END speech_transcribe_file_with_auto_punctuation] | ||
|
||
|
||
# [START speech_transcribe_diarization] | ||
def transcribe_file_with_diarization(path): | ||
def transcribe_file_with_diarization(speech_file): | ||
"""Transcribe the given audio file synchronously with diarization.""" | ||
# [START speech_transcribe_diarization] | ||
from google.cloud import speech_v1p1beta1 as speech | ||
client = speech.SpeechClient() | ||
|
||
with open(path, 'rb') as audio_file: | ||
# TODO(developer): Uncomment and set to a path to your audio file. | ||
# speech_file = 'path/to/file.wav' | ||
|
||
with open(speech_file, 'rb') as audio_file: | ||
content = audio_file.read() | ||
|
||
audio = speech.types.RecognitionAudio(content=content) | ||
|
@@ -154,7 +169,40 @@ def transcribe_file_with_diarization(path): | |
.format(i, alternative.transcript)) | ||
print('Speaker Tag for the first word: {}' | ||
.format(alternative.words[0].speaker_tag)) | ||
# [END speech_transcribe_diarization] | ||
# [END speech_transcribe_diarization] | ||
|
||
|
||
def transcribe_file_with_multichannel(speech_file): | ||
"""Transcribe the given audio file synchronously with | ||
multi channel.""" | ||
# [START speech_transcribe_multichannel] | ||
from google.cloud import speech_v1p1beta1 as speech | ||
client = speech.SpeechClient() | ||
|
||
# TODO(developer): Uncomment and set to a path to your audio file. | ||
# speech_file = 'path/to/file.wav' | ||
|
||
with open(speech_file, 'rb') as audio_file: | ||
content = audio_file.read() | ||
|
||
audio = speech.types.RecognitionAudio(content=content) | ||
|
||
config = speech.types.RecognitionConfig( | ||
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, | ||
sample_rate_hertz=16000, | ||
language_code='en-US', | ||
audio_channel_count=1, | ||
enable_separate_recognition_per_channel=True) | ||
|
||
response = client.recognize(config, audio) | ||
|
||
for i, result in enumerate(response.results): | ||
alternative = result.alternatives[0] | ||
print('-' * 20) | ||
print('First alternative of result {}'.format(i)) | ||
print(u'Transcript: {}'.format(alternative.transcript)) | ||
print(u'Channel Tag: {}'.format(result.channel_tag)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what happens where there are multiple channels? do we get the output from all channels into the same alternative? or does the output from the second channel go to the second alternative, and so on? |
||
# [END speech_transcribe_multichannel] | ||
|
||
|
||
if __name__ == '__main__': | ||
|
@@ -175,3 +223,5 @@ def transcribe_file_with_diarization(path): | |
transcribe_file_with_auto_punctuation(args.path) | ||
elif args.command == 'diarization': | ||
transcribe_file_with_diarization(args.path) | ||
elif args.command == 'multi-channel': | ||
transcribe_file_with_multichannel(args.path) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,8 @@ | |
transcribe_file_with_auto_punctuation, | ||
transcribe_file_with_diarization, | ||
transcribe_file_with_enhanced_model, | ||
transcribe_file_with_metadata) | ||
transcribe_file_with_metadata, | ||
transcribe_file_with_multichannel) | ||
|
||
RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') | ||
|
||
|
@@ -52,3 +53,11 @@ def test_transcribe_diarization(capsys): | |
out, err = capsys.readouterr() | ||
|
||
assert 'OK Google stream stranger things from Netflix to my TV' in out | ||
|
||
|
||
def test_transcribe_multichannel_file(capsys): | ||
transcribe_file_with_multichannel( | ||
os.path.join(RESOURCES, 'Google_Gnome.wav')) | ||
out, err = capsys.readouterr() | ||
|
||
assert 'OK Google stream stranger things from Netflix to my TV' in out | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is a single channel audio file, use an actual multichannel test file. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see this is a different way to use the region tags than commonly done in Python so far. Is this the convention going forward?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, this is a result of our most recent samples rubric working group. I'll be presenting on this in a team meeting post-Next.