From 73bb7e08481d5f0ed9bb522b107034c7f61cd4e0 Mon Sep 17 00:00:00 2001 From: Sushant Hiray Date: Wed, 17 Apr 2019 15:54:04 +0530 Subject: [PATCH 01/16] Initializing custom docs --- docs/concepts.md | 2 +- docs/speaker-diarization-api.md | 2 +- website/sidebars.json | 1 + website/siteConfig.js | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/concepts.md b/docs/concepts.md index d94e9ad..7094c9f 100644 --- a/docs/concepts.md +++ b/docs/concepts.md @@ -18,7 +18,7 @@ To link up your requests to webhooks you can pass a `webhook` parameter when mak If you return anything other than a HTTP 200 status to the webhook POST then we’ll try to deliver the response to the webhook for up to 5 times with an exponential backoff. If we don't receive a 200 response from your server, we stop delivering the response. -For testing you can create a temporary webhook using https://beeceptor.com/ +For testing you can create a temporary webhook using https://webhook.site/ To know more about webhooks visit https://simonfredsted.com/1583 diff --git a/docs/speaker-diarization-api.md b/docs/speaker-diarization-api.md index da1a2e1..b433176 100644 --- a/docs/speaker-diarization-api.md +++ b/docs/speaker-diarization-api.md @@ -175,6 +175,6 @@ audioType: can have the following values: | Parameter | Type | Description | Notes | | ---------- | ------ | -------------------------------------------------- | ----- | -| speaker_id | Number | The speaker id for the corresponding audio segment | | +| speaker_id | String | The speaker id for the corresponding audio segment | | | start | Number | Start time of the audio segment in seconds | | | end | Number | End time of the audio segment in seconds | | diff --git a/website/sidebars.json b/website/sidebars.json index 9b521f8..6628575 100755 --- a/website/sidebars.json +++ b/website/sidebars.json @@ -9,6 +9,7 @@ ], "Generic Audio Analysis": [ "speaker-diarization-api", + "speech-to-text-api", "speaker-enrollment-api", "speaker-identification-api", "realtime-speaker-identification-api", diff --git a/website/siteConfig.js b/website/siteConfig.js index ead27be..8f0469e 100755 --- a/website/siteConfig.js +++ b/website/siteConfig.js @@ -21,7 +21,7 @@ const users = [ ]; const siteConfig = { - title: "DeepAffects Developer Docs" /* title for your website */, + title: "" /* title for your website */, tagline: "The new standard for speech analysis APIs for developers", url: "https://docs.deepaffects.com" /* your website url */, baseUrl: "/" /* base url for your project */, From 26c1f199dec3b02f3fe19ecf8be77808c70de15c Mon Sep 17 00:00:00 2001 From: Sushant Hiray Date: Wed, 17 Apr 2019 15:54:33 +0530 Subject: [PATCH 02/16] Added asr api --- docs/asr-api.md | 217 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 217 insertions(+) create mode 100644 docs/asr-api.md diff --git a/docs/asr-api.md b/docs/asr-api.md new file mode 100644 index 0000000..7673394 --- /dev/null +++ b/docs/asr-api.md @@ -0,0 +1,217 @@ +--- +id: speech-to-text-api +title: Speech-to-Text API +sidebar_label: Automatic Speech Recognition API +--- + +Automatic Speech Recognition API provides high-quality speech-to-text conversion powered by machine learning. The api also supports speaker diarization and smart punctuation to further enhance the utility of the transcribed output. + +### POST Request + +`POST https://proxy.api.deepaffects.com/audio/generic/api/v1/async/asr` + +### Sample Code + +### Shell + +```shell +curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v1/async/asr?apikey=&webhook=&request_id=" -H 'content-type: application/json' -d @data.json + +# contents of data.json with content +{"content": "bytesEncodedAudioString", "sampleRate": 8000, "encoding": "FLAC", "languageCode": "en-US", "audioType": "callcenter", "enableSpeakerDiarization": true} + +# contents of data.json with url +{"url": "https://publicly-facing-url.flac", "sampleRate": 8000, "encoding": "FLAC", "languageCode": "en-US", "audioType": "callcenter", "enableSpeakerDiarization": true} +``` + +### Javascript + +```javascript +var request = require("request"); + +var options = { method: 'POST', + url: 'https://proxy.api.deepaffects.com/audio/generic/api/v1/async/asr', + qs: + { apikey: '', + webhook: '', + request_id: '' }, + headers: + { 'Content-Type': 'application/json' }, + body: + { encoding: 'FLAC', + languageCode: 'en-US', + url: 'https://publicly-facing-url.flac', + sampleRate: 8000, + enableSpeakerDiarization: true, + audioType: "callcenter" }, + json: true }; + +request(options, function (error, response, body) { + if (error) throw new Error(error); + + console.log(body); +}); + +``` + +### Python + +```python +import requests +import base64 + +url = "https://proxy.api.deepaffects.com/audio/generic/api/v1/async/asr" + +querystring = {"apikey":"", "webhook":"", "request_id":""} + +payload = { + "encoding": "FLAC", + "languageCode": "en-US", + "sampleRate": 8000 + "audioType": "callcenter", + "enableSpeakerDiarization": true +} + +# The api accepts data either as a url or as base64 encoded content +# passing payload as url: +payload["url"] = "https://publicly-facing-url.flac" +# alternatively, passing payload as content: +with open(audio_file_name, 'rb') as fin: + audio_content = fin.read() +payload["content"] = base64.b64encode(audio_content).decode('utf-8') + +headers = { + 'Content-Type': "application/json", +} + +response = requests.post(url, json=payload, headers=headers, params=querystring) + +print(response.text) +``` + +### Output + +```shell + +# Async: + +{ +"request_id": "8bdd983a-c6bd-4159-982d-6a2471406d62", +"api": "requested_api_name" +} + +# Webhook: + +{ +"request_id": "8bdd983a-c6bd-4159-982d-6a2471406d62", +"response": { + "num_speakers": 2, + "words":[ + { + "speaker_id": "1", + "start": 0, + "end": 1, + "word": "Hi", + "confidence": 0.97 + }, + { + "speaker_id": "2", + "start": 1.2, + "end": 2, + "word": "Hello", + "confidence": 0.97 + }, + { + "speaker_id": "2", + "start": 2, + "end": 2.6, + "word": "this", + "confidence": 0.97 + }, + { + "speaker_id": "2", + "start": 2.6, + "end": 3, + "word": "is", + "confidence": 0.97 + }, + { + "speaker_id": "2", + "start": 3, + "end": 4, + "word": "Susan", + "confidence": 0.97 + }, + ], + "transcript": "Hi! Hello, this is Susan." + } +} +``` + +### Body Parameters + +| Parameter | Type | Description | Notes | +| ------------ | ------ | -------------------------------------------------------- | ---------------------------- | +| encoding | String | Encoding of audio file like MP3, WAV etc. | | +| sampleRate | Number | Sample rate of the audio file. | | +| languageCode | String | Language spoken in the audio file. | [default to 'en-US'] | +| audioType | String | Type of the audio based on number of speakers | [default to callcenter] | +| content | String | base64 encoding of the audio file. | Optional | +| url | String | Publicly facing url | Optional | +| source | String | The source for the audio file: webex, zoom, gotomeeting, phone | Optional | +| enableSpeakerDiarization | Boolean | Tags each word corresponding to the speaker | [default to false] | + +audioType: can have the following values: + 1) callcenter + 2) meeting + 3) earningscalls + 4) interview + 5) media-broadcast + +> We recommend using callcenter when there are upto 6 speakers expected to be identified and meeting when more than 6 speakers are expected. + +> Exactly one of url and content should be passed. In case both values are passed, error is thrown + +> source: Adding source information enables an enhanced model which is built specifically for those audio sources. + +### Query Parameters + +| Parameter | Type | Description | Notes | +| ---------- | ------ | ---------------------------------------------------------------------- | ----------------------------------------------- | +| apikey | String | The apikey | Required for authentication inside all requests | +| webhook | String | The webhook url at which the responses will be sent | Required for async requests | +| request_id | Number | An optional unique id to link async response with the original request | Optional | + +### Output Parameters (Async) + +| Parameter | Type | Description | Notes | +| ---------- | ------ | ------------------------------- | ------------------------------------------------------------------ | +| request_id | String | The request id | This defaults to the originally sent id or is generated by the api | +| api | String | The api method which was called | | + +### Output Parameters (Webhook) + +| Parameter | Type | Description | Notes | +| ---------- | ------ | ------------------------------------ | ------------------------------------------------------------------ | +| request_id | String | The request id | This defaults to the originally sent id or is generated by the api | +| response | Object | The actual output of the transcription | The Transcribed object is defined below | + +#### Transcribed Object + +| Parameter | Type | Description | Notes | +| ------------ | ------ | ------------------------------- | ------------------------------------------------------------------------------- | +| num_speakers | Number | The number of speakers detected | Field is set only when `enableSpeakerDiarization` is `true` | +| words | List | List of word segments | The Word Segment is defined below | +| transcript | String | The entire transcript along with the punctuations powered by the Smart Punctuations API | | +| confidence | Number | Overall transcription confidence | | + + +#### Word Segment + +| Parameter | Type | Description | Notes | +| ---------- | ------ | -------------------------------------------------- | ----- | +| speaker_id | String | The speaker id for the corresponding audio segment | Field is set only when `enableSpeakerDiarization` is `true` | +| start | Number | Start time of the audio segment in seconds | | +| end | Number | End time of the audio segment in seconds | | +| word | String | The word corresponding to the audio segment | | +| confidence | Number | Confidence score for the word | | From 48d1ed8a2c6e64737804b97fac7363a81fe9c98f Mon Sep 17 00:00:00 2001 From: Sushant Hiray Date: Wed, 17 Apr 2019 16:19:51 +0530 Subject: [PATCH 03/16] make api to API --- docs/asr-api.md | 2 +- docs/audio-denoising-api.md | 4 ++-- docs/depression-prediction-api.md | 4 ++-- docs/emotion-recognition-api.md | 4 ++-- docs/interaction-analytics-api.md | 4 ++-- docs/paralinguistic-feature-extraction-api.md | 4 ++-- docs/realtime-emotion-recognition-api.md | 4 ++-- docs/realtime-speaker-identification-api.md | 4 ++-- docs/speaker-diarization-api.md | 4 ++-- docs/speaker-enrollment-api.md | 4 ++-- docs/speaker-identification-api.md | 4 ++-- docs/text-emotion-recognition-api.md | 4 ++-- docs/text-punctuation-api.md | 4 ++-- docs/voice-activity-detection-api.md | 4 ++-- 14 files changed, 27 insertions(+), 27 deletions(-) diff --git a/docs/asr-api.md b/docs/asr-api.md index 7673394..d91004f 100644 --- a/docs/asr-api.md +++ b/docs/asr-api.md @@ -51,7 +51,6 @@ request(options, function (error, response, body) { console.log(body); }); - ``` ### Python @@ -106,6 +105,7 @@ print(response.text) "request_id": "8bdd983a-c6bd-4159-982d-6a2471406d62", "response": { "num_speakers": 2, + "confidence": 0.97, "words":[ { "speaker_id": "1", diff --git a/docs/audio-denoising-api.md b/docs/audio-denoising-api.md index 14de10e..d951520 100644 --- a/docs/audio-denoising-api.md +++ b/docs/audio-denoising-api.md @@ -1,7 +1,7 @@ --- id: audio-denoising-api -title: Audio Denoising Api -sidebar_label: Audio Denoising Api +title: Audio Denoising API +sidebar_label: Audio Denoising API --- Audio denoising api removes noise from your audio signals and returns the denoised audio clip diff --git a/docs/depression-prediction-api.md b/docs/depression-prediction-api.md index e1d49de..80b762b 100644 --- a/docs/depression-prediction-api.md +++ b/docs/depression-prediction-api.md @@ -1,7 +1,7 @@ --- id: depression-prediction-api -title: Depression Prediction Api -sidebar_label: Depression Prediction Api +title: Depression Prediction API +sidebar_label: Depression Prediction API --- Depression prediction api predicts whether the audio clip contains a depressed person diff --git a/docs/emotion-recognition-api.md b/docs/emotion-recognition-api.md index 8ad3dc6..ea8b2fe 100644 --- a/docs/emotion-recognition-api.md +++ b/docs/emotion-recognition-api.md @@ -1,7 +1,7 @@ --- id: emotion-recognition-api -title: Emotion Recognition Api -sidebar_label: Emotion Recognition Api +title: Emotion Recognition API +sidebar_label: Emotion Recognition API --- Emotion recognition api extract basic emotions from the audio file diff --git a/docs/interaction-analytics-api.md b/docs/interaction-analytics-api.md index d14ece0..370ca11 100644 --- a/docs/interaction-analytics-api.md +++ b/docs/interaction-analytics-api.md @@ -1,7 +1,7 @@ --- id: interaction-analytics-api -title: Interaction Analytics Api -sidebar_label: Interaction Analytics Api +title: Interaction Analytics API +sidebar_label: Interaction Analytics API --- DeepAffects Interaction Analytics API extracts comprehensive interaction based metrics from your audio data diff --git a/docs/paralinguistic-feature-extraction-api.md b/docs/paralinguistic-feature-extraction-api.md index a71557f..dbd07da 100644 --- a/docs/paralinguistic-feature-extraction-api.md +++ b/docs/paralinguistic-feature-extraction-api.md @@ -1,7 +1,7 @@ --- id: paralinguistic-feature-extraction-api -title: Paralinguistic Feature Extraction Api -sidebar_label: Paralinguistic Feature Extraction Api +title: Paralinguistic Feature Extraction API +sidebar_label: Paralinguistic Feature Extraction API --- Paralingustic feature extraction api extracts features from audio file as explained in the next section diff --git a/docs/realtime-emotion-recognition-api.md b/docs/realtime-emotion-recognition-api.md index dcc9787..fa2af56 100644 --- a/docs/realtime-emotion-recognition-api.md +++ b/docs/realtime-emotion-recognition-api.md @@ -1,7 +1,7 @@ --- id: realtime-emotion-recognition-api -title: Realtime Emotion Recognition Api -sidebar_label: Realtime Emotion Recognition Api +title: Realtime Emotion Recognition API +sidebar_label: Realtime Emotion Recognition API --- Realtime emotion recognition api Extracts basic emotions from the audio file in realtime diff --git a/docs/realtime-speaker-identification-api.md b/docs/realtime-speaker-identification-api.md index 8ca35c5..deb3bb4 100644 --- a/docs/realtime-speaker-identification-api.md +++ b/docs/realtime-speaker-identification-api.md @@ -1,7 +1,7 @@ --- id: realtime-speaker-identification-api -title: Realtime Speaker Identification Api -sidebar_label: Realtime Speaker Identification Api +title: Realtime Speaker Identification API +sidebar_label: Realtime Speaker Identification API --- Realtime speaker identification api identifies speakers from the audio file in realtime diff --git a/docs/speaker-diarization-api.md b/docs/speaker-diarization-api.md index b433176..88091a8 100644 --- a/docs/speaker-diarization-api.md +++ b/docs/speaker-diarization-api.md @@ -1,7 +1,7 @@ --- id: speaker-diarization-api -title: Speaker Diarization Api -sidebar_label: Speaker Diarization Api +title: Speaker Diarization API +sidebar_label: Speaker Diarization API --- Speaker diarization api tries to figure out "Who Speaks When". diff --git a/docs/speaker-enrollment-api.md b/docs/speaker-enrollment-api.md index 7282984..ae0d3a6 100644 --- a/docs/speaker-enrollment-api.md +++ b/docs/speaker-enrollment-api.md @@ -1,7 +1,7 @@ --- id: speaker-enrollment-api -title: Speaker Enrollment Api -sidebar_label: Speaker Enrollment Api +title: Speaker Enrollment API +sidebar_label: Speaker Enrollment API ---

Speaker Enrollment API for Identification (REST Api)

diff --git a/docs/speaker-identification-api.md b/docs/speaker-identification-api.md index 7386934..99c4bcb 100644 --- a/docs/speaker-identification-api.md +++ b/docs/speaker-identification-api.md @@ -1,7 +1,7 @@ --- id: speaker-identification-api -title: Speaker Identification Api -sidebar_label: Speaker Identification Api +title: Speaker Identification API +sidebar_label: Speaker Identification API --- Speaker identification api tries to figure out "Who Speaks When" for already enrolled speakers. diff --git a/docs/text-emotion-recognition-api.md b/docs/text-emotion-recognition-api.md index 697452f..63b4bd7 100644 --- a/docs/text-emotion-recognition-api.md +++ b/docs/text-emotion-recognition-api.md @@ -1,7 +1,7 @@ --- id: text-emotion-recognition-api -title: Text Emotion Recognition Api -sidebar_label: Text Emotion Recognition Api +title: Text Emotion Recognition API +sidebar_label: Text Emotion Recognition API --- Text emotion api extracts basic emotions from the text input diff --git a/docs/text-punctuation-api.md b/docs/text-punctuation-api.md index 1945cf2..f602cd8 100644 --- a/docs/text-punctuation-api.md +++ b/docs/text-punctuation-api.md @@ -1,7 +1,7 @@ --- id: text-punctuation-api -title: Smart Punctuation Api -sidebar_label: Smart Punctuation Api +title: Smart Punctuation API +sidebar_label: Smart Punctuation API --- Do you've incorrectly formatted or unformatted text? The DeepAffects Smart Punctuation takes a text blob and adds relevant punctuations to the text. The Punctuation API is specifically trained to accurately punctuate noisy asr output. diff --git a/docs/voice-activity-detection-api.md b/docs/voice-activity-detection-api.md index 7228528..c8bd868 100644 --- a/docs/voice-activity-detection-api.md +++ b/docs/voice-activity-detection-api.md @@ -1,7 +1,7 @@ --- id: voice-activity-detection-api -title: Voice Activity Detection Api -sidebar_label: Voice Activity Detection Api +title: Voice Activity Detection API +sidebar_label: Voice Activity Detection API --- Voice activity detection (VAD) is a technique used in speech processing to detect the presence (or absence) of human speech. The DeepAffects Voice activity detection API analyzes the audio input and returns specific segments where human speech is detected. From a1858f5b9c4bf58b0d065afc94042a43c5e42796 Mon Sep 17 00:00:00 2001 From: Sushant Hiray Date: Wed, 17 Apr 2019 16:48:08 +0530 Subject: [PATCH 04/16] request_id is string --- docs/asr-api.md | 9 ++++----- docs/audio-denoising-api.md | 2 +- docs/depression-prediction-api.md | 2 +- docs/emotion-recognition-api.md | 2 +- docs/interaction-analytics-api.md | 2 +- docs/paralinguistic-feature-extraction-api.md | 2 +- docs/speaker-diarization-api.md | 2 +- docs/speaker-identification-api.md | 2 +- docs/text-punctuation-api.md | 2 +- docs/voice-activity-detection-api.md | 2 +- 10 files changed, 13 insertions(+), 14 deletions(-) diff --git a/docs/asr-api.md b/docs/asr-api.md index d91004f..cf86b61 100644 --- a/docs/asr-api.md +++ b/docs/asr-api.md @@ -15,7 +15,7 @@ Automatic Speech Recognition API provides high-quality speech-to-text conversion ### Shell ```shell -curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v1/async/asr?apikey=&webhook=&request_id=" -H 'content-type: application/json' -d @data.json +curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v1/async/asr?apikey=&webhook=" -H 'content-type: application/json' -d @data.json # contents of data.json with content {"content": "bytesEncodedAudioString", "sampleRate": 8000, "encoding": "FLAC", "languageCode": "en-US", "audioType": "callcenter", "enableSpeakerDiarization": true} @@ -33,8 +33,7 @@ var options = { method: 'POST', url: 'https://proxy.api.deepaffects.com/audio/generic/api/v1/async/asr', qs: { apikey: '', - webhook: '', - request_id: '' }, + webhook: ''}, headers: { 'Content-Type': 'application/json' }, body: @@ -61,7 +60,7 @@ import base64 url = "https://proxy.api.deepaffects.com/audio/generic/api/v1/async/asr" -querystring = {"apikey":"", "webhook":"", "request_id":""} +querystring = {"apikey":"", "webhook":""} payload = { "encoding": "FLAC", @@ -180,7 +179,7 @@ audioType: can have the following values: | ---------- | ------ | ---------------------------------------------------------------------- | ----------------------------------------------- | | apikey | String | The apikey | Required for authentication inside all requests | | webhook | String | The webhook url at which the responses will be sent | Required for async requests | -| request_id | Number | An optional unique id to link async response with the original request | Optional | +| request_id | String | An optional unique id to link async response with the original request | Optional | ### Output Parameters (Async) diff --git a/docs/audio-denoising-api.md b/docs/audio-denoising-api.md index d951520..07e875d 100644 --- a/docs/audio-denoising-api.md +++ b/docs/audio-denoising-api.md @@ -125,7 +125,7 @@ print(response.text) | ---------- | ------ | ---------------------------------------------------------------------- | ----------------------------------------------- | | apikey | String | The apikey | Required for authentication inside all requests | | webhook | String | The webhook url at which the responses will be sent | Required for async requests | -| request_id | Number | An optional unique id to link async response with the original request | Optional | +| request_id | String | An optional unique id to link async response with the original request | Optional | ### Output Parameters (Async) diff --git a/docs/depression-prediction-api.md b/docs/depression-prediction-api.md index 80b762b..caae3b6 100644 --- a/docs/depression-prediction-api.md +++ b/docs/depression-prediction-api.md @@ -124,7 +124,7 @@ True | ---------- | ------ | ---------------------------------------------------------------------- | ----------------------------------------------- | | apikey | String | The apikey | Required for authentication inside all requests | | webhook | String | The webhook url at which the responses will be sent | Required for async requests | -| request_id | Number | An optional unique id to link async response with the original request | Optional | +| request_id | String | An optional unique id to link async response with the original request | Optional | ### Output Parameters (Sync) diff --git a/docs/emotion-recognition-api.md b/docs/emotion-recognition-api.md index ea8b2fe..dac36fc 100644 --- a/docs/emotion-recognition-api.md +++ b/docs/emotion-recognition-api.md @@ -80,7 +80,7 @@ curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v2/async/recog | ---------- | ------ | ---------------------------------------------------------------------- | ----------------------------------------------- | | apikey | String | The apikey | Required for authentication inside all requests | | webhook | String | The webhook url at which the responses will be sent | Required for async requests | -| request_id | Number | An optional unique id to link async response with the original request | Optional | +| request_id | String | An optional unique id to link async response with the original request | Optional | ### Output Parameters diff --git a/docs/interaction-analytics-api.md b/docs/interaction-analytics-api.md index 370ca11..c7e985c 100644 --- a/docs/interaction-analytics-api.md +++ b/docs/interaction-analytics-api.md @@ -209,7 +209,7 @@ print(response.text) | ---------- | ------ | ---------------------------------------------------------------------- | ----------------------------------------------- | | apikey | String | The apikey | Required for authentication inside all requests | | webhook | String | The webhook url at which the responses will be sent | Required for async requests | -| request_id | Number | An optional unique id to link async response with the original request | Optional | +| request_id | String | An optional unique id to link async response with the original request | Optional | ### Output Parameters (Async) diff --git a/docs/paralinguistic-feature-extraction-api.md b/docs/paralinguistic-feature-extraction-api.md index dbd07da..ecffca0 100644 --- a/docs/paralinguistic-feature-extraction-api.md +++ b/docs/paralinguistic-feature-extraction-api.md @@ -133,7 +133,7 @@ apiInstance.asyncFeaturizeAudio(body, webhook, callback); | ---------- | ------ | ---------------------------------------------------------------------- | ----------------------------------------------- | | apikey | String | The apikey | Required for authentication inside all requests | | webhook | String | The webhook url at which the responses will be sent | Required for async requests | -| request_id | Number | An optional unique id to link async response with the original request | Optional | +| request_id | String | An optional unique id to link async response with the original request | Optional | ### Output Parameters diff --git a/docs/speaker-diarization-api.md b/docs/speaker-diarization-api.md index 88091a8..d75c1c6 100644 --- a/docs/speaker-diarization-api.md +++ b/docs/speaker-diarization-api.md @@ -148,7 +148,7 @@ audioType: can have the following values: | ---------- | ------ | ---------------------------------------------------------------------- | ----------------------------------------------- | | apikey | String | The apikey | Required for authentication inside all requests | | webhook | String | The webhook url at which the responses will be sent | Required for async requests | -| request_id | Number | An optional unique id to link async response with the original request | Optional | +| request_id | String | An optional unique id to link async response with the original request | Optional | ### Output Parameters (Async) diff --git a/docs/speaker-identification-api.md b/docs/speaker-identification-api.md index 99c4bcb..9123176 100644 --- a/docs/speaker-identification-api.md +++ b/docs/speaker-identification-api.md @@ -89,7 +89,7 @@ curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v2/sync/diariz | ---------- | ------ | ---------------------------------------------------------------------- | ----------------------------------------------- | | apikey | String | The apikey | Required for authentication inside all requests | | webhook | String | The webhook url at which the responses will be sent | Required for async requests | -| request_id | Number | An optional unique id to link async response with the original request | Optional | +| request_id | String | An optional unique id to link async response with the original request | Optional | ### Output Parameters (Async) diff --git a/docs/text-punctuation-api.md b/docs/text-punctuation-api.md index f602cd8..c01275d 100644 --- a/docs/text-punctuation-api.md +++ b/docs/text-punctuation-api.md @@ -112,7 +112,7 @@ print(response.text) | ---------- | ------ | ---------------------------------------------------------------------- | ----------------------------------------------- | | apikey | String | The apikey | Required for authentication inside all requests | | webhook | String | The webhook url at which the responses will be sent | Required for async requests | -| request_id | Number | An optional unique id to link async response with the original request | Optional | +| request_id | String | An optional unique id to link async response with the original request | Optional | ### Output Parameters (Async) diff --git a/docs/voice-activity-detection-api.md b/docs/voice-activity-detection-api.md index c8bd868..c0032d6 100644 --- a/docs/voice-activity-detection-api.md +++ b/docs/voice-activity-detection-api.md @@ -128,7 +128,7 @@ print(response.text) | ---------- | ------ | ---------------------------------------------------------------------- | ----------------------------------------------- | | apikey | String | The apikey | Required for authentication inside all requests | | webhook | String | The webhook url at which the responses will be sent | Required for async requests | -| request_id | Number | An optional unique id to link async response with the original request | Optional | +| request_id | String | An optional unique id to link async response with the original request | Optional | ### Output Parameters (Async) From 03f3177fdb8b077edada03fa4a44ef8db9d3903e Mon Sep 17 00:00:00 2001 From: Sushant Hiray Date: Wed, 17 Apr 2019 16:54:24 +0530 Subject: [PATCH 05/16] Remove request_id from the sample code, only keep it in docs. --- docs/audio-denoising-api.md | 4 ++-- docs/depression-prediction-api.md | 2 +- docs/emotion-recognition-api.md | 2 +- docs/interaction-analytics-api.md | 4 ++-- docs/paralinguistic-feature-extraction-api.md | 2 +- docs/speaker-diarization-api.md | 6 +++--- docs/speaker-identification-api.md | 2 +- docs/text-punctuation-api.md | 5 ++--- docs/voice-activity-detection-api.md | 5 ++--- 9 files changed, 15 insertions(+), 17 deletions(-) diff --git a/docs/audio-denoising-api.md b/docs/audio-denoising-api.md index 07e875d..69c1d50 100644 --- a/docs/audio-denoising-api.md +++ b/docs/audio-denoising-api.md @@ -44,7 +44,7 @@ apiInstance.asyncDenoiseAudio(body, webhook, callback); ```shell # async request -curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v2/async/denoise?apikey=>&webhook=&request_id=abcd-1234" -H 'content-type: application/json' -d @data.json +curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v2/async/denoise?apikey=>&webhook=" -H 'content-type: application/json' -d @data.json # contents of data.json {"content": "bytesEncodedAudioString", "sampleRate": 8000, "encoding": "FLAC", "languageCode": "en-US"} @@ -58,7 +58,7 @@ import base64 url = "https://proxy.api.deepaffects.com/audio/generic/api/v2/async/denoise" -querystring = {"apikey":"", "webhook":"", "request_id":""} +querystring = {"apikey":"", "webhook":""} payload = { "encoding": "Wave", diff --git a/docs/depression-prediction-api.md b/docs/depression-prediction-api.md index caae3b6..7a3d277 100644 --- a/docs/depression-prediction-api.md +++ b/docs/depression-prediction-api.md @@ -27,7 +27,7 @@ Depression prediction api predicts whether the audio clip contains a depressed p ```shell curl -X POST "https://proxy.api.deepaffects.com/audio/custom/ellipsis/api/v1/sync/is_depressed?apikey=" -H 'content-type: application/json' -d @data.json -curl -X POST "https://proxy.api.deepaffects.com/audio/custom/ellipsis/api/v1/async/is_depressed?apikey=&webhook=&request_id=abcd-1234" -H 'content-type: application/json' -d @data.json +curl -X POST "https://proxy.api.deepaffects.com/audio/custom/ellipsis/api/v1/async/is_depressed?apikey=&webhook=" -H 'content-type: application/json' -d @data.json # contents of data.json {"content": "bytesEncodedAudioString", "sampleRate": 8000, "encoding": "FLAC", "languageCode": "en-US"} diff --git a/docs/emotion-recognition-api.md b/docs/emotion-recognition-api.md index dac36fc..fa514a7 100644 --- a/docs/emotion-recognition-api.md +++ b/docs/emotion-recognition-api.md @@ -19,7 +19,7 @@ Emotion recognition api extract basic emotions from the audio file ```shell curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v2/sync/recognise_emotion?apikey=" -H 'content-type: application/json' -d @data.json -curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v2/async/recognise_emotion?apikey=&webhook=&request_id=abcd-1234" -H 'content-type: application/json' -d @data.json +curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v2/async/recognise_emotion?apikey=&webhook=" -H 'content-type: application/json' -d @data.json # contents of data.json {"content": "bytesEncodedAudioString", "sampleRate": 8000, "encoding": "FLAC", "languageCode": "en-US"} diff --git a/docs/interaction-analytics-api.md b/docs/interaction-analytics-api.md index c7e985c..f72ec3b 100644 --- a/docs/interaction-analytics-api.md +++ b/docs/interaction-analytics-api.md @@ -15,7 +15,7 @@ DeepAffects Interaction Analytics API extracts comprehensive interaction based m ### Shell ```shell -curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v1/async/analytics/interaction?apikey=&webhook=&request_id=" -H 'content-type: application/json' -d @data.json +curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v1/async/analytics/interaction?apikey=&webhook=" -H 'content-type: application/json' -d @data.json # contents of data.json {"content": "bytesEncodedAudioString", "sampleRate": 8000, "encoding": "FLAC", "languageCode": "en-US"} @@ -60,7 +60,7 @@ import base64 url = "https://proxy.api.deepaffects.com/audio/generic/api/v1/async/analytics/interaction" -querystring = {"apikey":"", "webhook":"", "request_id":""} +querystring = {"apikey":"", "webhook":""} payload = { "encoding": "FLAC", diff --git a/docs/paralinguistic-feature-extraction-api.md b/docs/paralinguistic-feature-extraction-api.md index ecffca0..4bfd8bf 100644 --- a/docs/paralinguistic-feature-extraction-api.md +++ b/docs/paralinguistic-feature-extraction-api.md @@ -24,7 +24,7 @@ There are two stages in the audio feature extraction methodology: ```shell curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v1/sync/featurize?apikey=" -H 'content-type: application/json' -d @data.json -curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v1/async/featurize?apikey=&webhook=&request_id=abcd-1234" -H 'content-type: application/json' -d @data.json +curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v1/async/featurize?apikey=&webhook=" -H 'content-type: application/json' -d @data.json # contents of data.json {"content": "bytesEncodedAudioString", "sampleRate": 8000, "encoding": "FLAC", "languageCode": "en-US"} diff --git a/docs/speaker-diarization-api.md b/docs/speaker-diarization-api.md index d75c1c6..516c0e0 100644 --- a/docs/speaker-diarization-api.md +++ b/docs/speaker-diarization-api.md @@ -16,10 +16,10 @@ Splits audio clip into segments corresponding to a unique speaker ### Shell ```shell -curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v2/async/diarize?apikey=&webhook=&request_id=" -H 'content-type: application/json' -d @data.json +curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v2/async/diarize?apikey=&webhook=" -H 'content-type: application/json' -d @data.json # contents of data.json -{"content": "bytesEncodedAudioString", "sampleRate": 8000, "encoding": "FLAC", "languageCode": "en-US", "speakers": 2, "audioType": "callcenter"} +{"content": "bytesEncodedAudioString", "sampleRate": 8000, "encoding": "FLAC", "languageCode": "en-US", "audioType": "callcenter"} ``` ### Javascript @@ -57,7 +57,7 @@ import base64 url = "https://proxy.api.deepaffects.com/audio/generic/api/v2/async/diarize" -querystring = {"apikey":"", "webhook":"", "request_id":""} +querystring = {"apikey":"", "webhook":""} payload = { "encoding": "Wave", diff --git a/docs/speaker-identification-api.md b/docs/speaker-identification-api.md index 9123176..ef8a014 100644 --- a/docs/speaker-identification-api.md +++ b/docs/speaker-identification-api.md @@ -24,7 +24,7 @@ Splits audio clip into segments corresponding to a unique speaker and returns st ### Shell ```shell -curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v2/async/diarization/identify?apikey=&webhook=&request_id=" -H 'content-type: application/json' -d @data.json +curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v2/async/diarization/identify?apikey=&webhook=" -H 'content-type: application/json' -d @data.json curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v2/sync/diarization/identify?apikey=" -H 'content-type: application/json' -d @data.json diff --git a/docs/text-punctuation-api.md b/docs/text-punctuation-api.md index c01275d..add1198 100644 --- a/docs/text-punctuation-api.md +++ b/docs/text-punctuation-api.md @@ -25,7 +25,7 @@ Currently, the following punctuations are added to the text: ### Shell ```shell -curl -X POST "https://proxy.api.deepaffects.com/text/generic/api/v1/async/punctuate?apikey=&webhook=&request_id=" -H 'content-type: application/json' -d @data.json +curl -X POST "https://proxy.api.deepaffects.com/text/generic/api/v1/async/punctuate?apikey=&webhook=" -H 'content-type: application/json' -d @data.json # contents of data.json {"texts": ["so its more fluid than it is and you know its not the best kind of feedback right"]} @@ -39,7 +39,6 @@ var options = { method: 'POST', url: 'https://proxy.api.deepaffects.com/text/generic/api/v1/async/punctuate', qs: { apikey: '', - request_id: '', webhook: '' }, headers: { 'Content-Type': 'application/json' }, @@ -62,7 +61,7 @@ import base64 url = "https://proxy.api.deepaffects.com/text/generic/api/v1/async/punctuate" -querystring = {"apikey":"", "webhook":"", "request_id":""} +querystring = {"apikey":"", "webhook":""} {"texts": ["so its more fluid than it is and you know its not the best kind of feedback right"]} diff --git a/docs/voice-activity-detection-api.md b/docs/voice-activity-detection-api.md index c0032d6..d3593ad 100644 --- a/docs/voice-activity-detection-api.md +++ b/docs/voice-activity-detection-api.md @@ -18,7 +18,7 @@ Voice activity detection (VAD) is a technique used in speech processing to detec ### Shell ```shell -curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v1/async/vad?apikey=&webhook=&request_id=" -H 'content-type: application/json' -d @data.json +curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v1/async/vad?apikey=&webhook=" -H 'content-type: application/json' -d @data.json # contents of data.json {"content": "bytesEncodedAudioString", "sampleRate": 8000, "encoding": "FLAC", "languageCode": "en-US", "minNonSpeechDuration": 1} @@ -32,7 +32,6 @@ var options = { method: 'POST', url: 'https://proxy.api.deepaffects.com/audio/generic/api/v1/async/vad', qs: { apikey: '', - request_id: '', webhook: '' }, headers: { 'Content-Type': 'application/json' }, @@ -58,7 +57,7 @@ import base64 url = "https://proxy.api.deepaffects.com/audio/generic/api/v1/async/vad" -querystring = {"apikey":"", "webhook":"", "request_id":""} +querystring = {"apikey":"", "webhook":""} payload = { "encoding": "Wave", From 783df7278ba4b138ef3a4bcb6c1bd4558733f04e Mon Sep 17 00:00:00 2001 From: Sushant Hiray Date: Wed, 17 Apr 2019 20:31:16 +0530 Subject: [PATCH 06/16] Add job status api --- docs/asr-api.md | 2 +- docs/job-status-api.md | 131 +++++++++++++++++++++++++++++++++++++++++ website/sidebars.json | 3 + 3 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 docs/job-status-api.md diff --git a/docs/asr-api.md b/docs/asr-api.md index cf86b61..99172df 100644 --- a/docs/asr-api.md +++ b/docs/asr-api.md @@ -140,7 +140,7 @@ print(response.text) "end": 4, "word": "Susan", "confidence": 0.97 - }, + } ], "transcript": "Hi! Hello, this is Susan." } diff --git a/docs/job-status-api.md b/docs/job-status-api.md new file mode 100644 index 0000000..58cdc03 --- /dev/null +++ b/docs/job-status-api.md @@ -0,0 +1,131 @@ +--- +id: job-status-api +title: Job Status API +sidebar_label: Job Status API +--- + +Job Status API returns information about the status of the job and its corresponding output. + +### GET Request + +`GET https://proxy.api.deepaffects.com/transaction/generic/api/v1/async/status` + +### Sample Code + +### Shell + +```shell +curl -X GET \ + 'https://proxy.api.deepaffects.com/transaction/generic/api/v1/async/status?apikey=&request_id=' +``` + +### Javascript +```javascript +var request = require("request"); + +var options = { method: 'GET', + url: 'https://proxy.api.deepaffects.com/transaction/generic/api/v1/async/status', + qs: + { apikey: '', + request_id: '' }}; + +request(options, function (error, response, body) { + if (error) throw new Error(error); + + console.log(body); +}); +``` + +### Python +```python +import requests + +url = "https://proxy.api.deepaffects.com/transaction/generic/api/v1/async/status" + +querystring = {"apikey":"","request_id":""} + +payload = "" +headers = { + 'cache-control': "no-cache" + } + +response = requests.get(url, data=payload, headers=headers, params=querystring) + +print(response.text) +``` + +### Output + +```shell +{ + "response": { + "request_id": "", + "response": { + "confidence": 0.97, + "num_speakers": 2, + "transcript": "Hi! Hello, this is Susan.", + "words": [ + { + "confidence": 0.97, + "end": 1, + "speaker_id": "1", + "start": 0, + "word": "Hi" + }, + { + "confidence": 0.97, + "end": 2, + "speaker_id": "2", + "start": 1.2, + "word": "Hello" + }, + { + "confidence": 0.97, + "end": 2.6, + "speaker_id": "2", + "start": 2, + "word": "this" + }, + { + "confidence": 0.97, + "end": 3, + "speaker_id": "2", + "start": 2.6, + "word": "is" + }, + { + "confidence": 0.97, + "end": 4, + "speaker_id": "2", + "start": 3, + "word": "Susan" + } + ] + } + }, + "status": "Completed" +} +``` +### Query Parameters + +| Parameter | Type | Description | Notes | +| ---------- | ------ | ---------------------------------------------------------------------- | ----------------------------------------------- | +| apikey | String | The apikey | Required for authentication inside all requests | +| request_id | String | The request_id corresponding to the request | Required | + +> Note: Polling is NOT recommended in a production server. Rather, use webhooks to asynchronously recieve notifications once the job completes. If you have any further questions, contact us at support@seernet.io + + +### Output Parameters +| Parameter | Type | Description | Notes | +| ---------- | ------ | ---------------------------------------------------------------------- | ----------------------------------------------- | +| status | String | The status of the job: `Completed`, `Failed`, `Running` | | +| response | Object | The response corresponding to the transaction id | The Response Object defined below | + + +### Response Object + +| Parameter | Type | Description | Notes | +| ---------- | ------ | ---------------------------------------------------------------------- | ----------------------------------------------- | +| request_id | String | The request_id corresponding to the request | +| response | Object | The response object as defined in the required API type | | diff --git a/website/sidebars.json b/website/sidebars.json index 6628575..3b1dcec 100755 --- a/website/sidebars.json +++ b/website/sidebars.json @@ -7,6 +7,9 @@ "errors", "concepts" ], + "Jobs": [ + "job-status-api" + ], "Generic Audio Analysis": [ "speaker-diarization-api", "speech-to-text-api", From 82e65ebab2662813591081f48cba7d032a3732f8 Mon Sep 17 00:00:00 2001 From: Sushant Hiray Date: Wed, 24 Apr 2019 15:39:59 +0530 Subject: [PATCH 07/16] update the concepts --- docs/concepts.md | 15 +++++++++++++-- docs/speaker-diarization-api.md | 2 +- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/docs/concepts.md b/docs/concepts.md index 1f077cf..e806079 100644 --- a/docs/concepts.md +++ b/docs/concepts.md @@ -34,7 +34,7 @@ For realtime processing we've created realtime api's based on grpc which accepts DeepAffects supports various audio types for ease of integration. As a rule of thumb, we support all the audiotypes which are inherently supported by [ffmpeg](https://trac.ffmpeg.org/wiki/audio%20types) -A subset of the supported formats is specified below. A more exhaustive list can be fetched via `ffmpeg -formats` +A subset of the supported formats is specified as follows: 1. WAV 2. MP3 @@ -44,4 +44,15 @@ A subset of the supported formats is specified below. A more exhaustive list can 6. MP4 7. M4A 8. MOV -9. WMV (Windows Media Video) \ No newline at end of file +9. WMV (Windows Media Video) + +> A more exhaustive list can be fetched via `ffmpeg -formats`. + +### Best practices for handling audio data + +1. It is recommended to pass audio without performing any cosmetic/structural changes to the original file. Changes such as encoding/re-encoding/upsampling/downsampling/automatic gain control (AGC) cause a downstream impact on the accuracy of the results. + +2. Store the audio data in a lossless format wherever possible. Lossy audio may have a negative impact on the accuracy of the API. + +3. In case of multiple channels, DeepAffects downmixes it to a single channel during transcoding. + diff --git a/docs/speaker-diarization-api.md b/docs/speaker-diarization-api.md index f6f82ef..be1bd57 100644 --- a/docs/speaker-diarization-api.md +++ b/docs/speaker-diarization-api.md @@ -62,7 +62,7 @@ querystring = {"apikey":"", "webhook":""} payload = { "encoding": "Wave", "languageCode": "en-US", - "speakers": -1, + "speakerCount": -1, "doVad": True } From 99d768f78a23821981bd087950f649609511d7ca Mon Sep 17 00:00:00 2001 From: Sushant Hiray Date: Fri, 26 Apr 2019 14:31:56 +0530 Subject: [PATCH 08/16] Add punctuation api --- docs/asr-api.md | 7 +++++-- docs/concepts.md | 1 - 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/asr-api.md b/docs/asr-api.md index 99172df..93e05d3 100644 --- a/docs/asr-api.md +++ b/docs/asr-api.md @@ -42,6 +42,7 @@ var options = { method: 'POST', url: 'https://publicly-facing-url.flac', sampleRate: 8000, enableSpeakerDiarization: true, + enablePunctuation: true, audioType: "callcenter" }, json: true }; @@ -67,7 +68,8 @@ payload = { "languageCode": "en-US", "sampleRate": 8000 "audioType": "callcenter", - "enableSpeakerDiarization": true + "enableSpeakerDiarization": true, + "enablePunctuation": true } # The api accepts data either as a url or as base64 encoded content @@ -159,6 +161,7 @@ print(response.text) | url | String | Publicly facing url | Optional | | source | String | The source for the audio file: webex, zoom, gotomeeting, phone | Optional | | enableSpeakerDiarization | Boolean | Tags each word corresponding to the speaker | [default to false] | +| enablePunctuation | Boolean | Enables DeepAffects [Smart Punctuation API](./text-punctuation-api.html) | [default to true] | audioType: can have the following values: 1) callcenter @@ -201,7 +204,7 @@ audioType: can have the following values: | ------------ | ------ | ------------------------------- | ------------------------------------------------------------------------------- | | num_speakers | Number | The number of speakers detected | Field is set only when `enableSpeakerDiarization` is `true` | | words | List | List of word segments | The Word Segment is defined below | -| transcript | String | The entire transcript along with the punctuations powered by the Smart Punctuations API | | +| transcript | String | The entire transcript with/without punctuations according to the input | | | confidence | Number | Overall transcription confidence | | diff --git a/docs/concepts.md b/docs/concepts.md index e806079..7cb1a5e 100644 --- a/docs/concepts.md +++ b/docs/concepts.md @@ -55,4 +55,3 @@ A subset of the supported formats is specified as follows: 2. Store the audio data in a lossless format wherever possible. Lossy audio may have a negative impact on the accuracy of the API. 3. In case of multiple channels, DeepAffects downmixes it to a single channel during transcoding. - From 6279434547cb30affea524106ebc16815e628828 Mon Sep 17 00:00:00 2001 From: Sushant Hiray Date: Fri, 26 Apr 2019 14:50:25 +0530 Subject: [PATCH 09/16] Added link to punctuation api --- docs/asr-api.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/asr-api.md b/docs/asr-api.md index 93e05d3..c4160c8 100644 --- a/docs/asr-api.md +++ b/docs/asr-api.md @@ -161,7 +161,7 @@ print(response.text) | url | String | Publicly facing url | Optional | | source | String | The source for the audio file: webex, zoom, gotomeeting, phone | Optional | | enableSpeakerDiarization | Boolean | Tags each word corresponding to the speaker | [default to false] | -| enablePunctuation | Boolean | Enables DeepAffects [Smart Punctuation API](./text-punctuation-api.html) | [default to true] | +| enablePunctuation | Boolean | Enables DeepAffects [Smart Punctuation API](text-punctuation-api.md) | [default to true] | audioType: can have the following values: 1) callcenter From 697860f972a7e087513fc5e9c8f6d13d90c5df45 Mon Sep 17 00:00:00 2001 From: Sushant Hiray Date: Fri, 26 Apr 2019 15:52:03 +0530 Subject: [PATCH 10/16] code blocks for asr --- docs/asr-api.md | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/docs/asr-api.md b/docs/asr-api.md index c4160c8..f7cf181 100644 --- a/docs/asr-api.md +++ b/docs/asr-api.md @@ -12,7 +12,9 @@ Automatic Speech Recognition API provides high-quality speech-to-text conversion ### Sample Code -### Shell + + + ```shell curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v1/async/asr?apikey=&webhook=" -H 'content-type: application/json' -d @data.json @@ -24,7 +26,7 @@ curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v1/async/asr?a {"url": "https://publicly-facing-url.flac", "sampleRate": 8000, "encoding": "FLAC", "languageCode": "en-US", "audioType": "callcenter", "enableSpeakerDiarization": true} ``` -### Javascript + ```javascript var request = require("request"); @@ -53,7 +55,7 @@ request(options, function (error, response, body) { }); ``` -### Python + ```python import requests @@ -88,20 +90,23 @@ response = requests.post(url, json=payload, headers=headers, params=querystring) print(response.text) ``` + ### Output -```shell + -# Async: + +```json { "request_id": "8bdd983a-c6bd-4159-982d-6a2471406d62", "api": "requested_api_name" } +``` -# Webhook: - + +```json { "request_id": "8bdd983a-c6bd-4159-982d-6a2471406d62", "response": { @@ -148,6 +153,7 @@ print(response.text) } } ``` + ### Body Parameters From 9219b91f4cdab30938de8200e1eeab15c290ae4d Mon Sep 17 00:00:00 2001 From: Sushant Hiray Date: Mon, 29 Apr 2019 12:49:02 +0530 Subject: [PATCH 11/16] added multi-line curl --- docs/asr-api.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/asr-api.md b/docs/asr-api.md index f7cf181..b127aa0 100644 --- a/docs/asr-api.md +++ b/docs/asr-api.md @@ -17,7 +17,8 @@ Automatic Speech Recognition API provides high-quality speech-to-text conversion ```shell -curl -X POST "https://proxy.api.deepaffects.com/audio/generic/api/v1/async/asr?apikey=&webhook=" -H 'content-type: application/json' -d @data.json +curl -X POST \ +"https://proxy.api.deepaffects.com/audio/generic/api/v1/async/asr?apikey=&webhook=" -H 'content-type: application/json' -d @data.json # contents of data.json with content {"content": "bytesEncodedAudioString", "sampleRate": 8000, "encoding": "FLAC", "languageCode": "en-US", "audioType": "callcenter", "enableSpeakerDiarization": true} From 33ebc26db1514a77ddb5d8b0dab61659a1f579b2 Mon Sep 17 00:00:00 2001 From: Sushant Hiray Date: Fri, 24 May 2019 12:24:54 +0530 Subject: [PATCH 12/16] separateSpeakerPerChannel for asr api --- docs/asr-api.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/asr-api.md b/docs/asr-api.md index b127aa0..02bbc79 100644 --- a/docs/asr-api.md +++ b/docs/asr-api.md @@ -163,12 +163,13 @@ print(response.text) | encoding | String | Encoding of audio file like MP3, WAV etc. | | | sampleRate | Number | Sample rate of the audio file. | | | languageCode | String | Language spoken in the audio file. | [default to 'en-US'] | +| content | String | base64 encoding of the audio file. | Semi-Optional | +| url | String | Publicly facing url | Semi-Optional | | audioType | String | Type of the audio based on number of speakers | [default to callcenter] | -| content | String | base64 encoding of the audio file. | Optional | -| url | String | Publicly facing url | Optional | | source | String | The source for the audio file: webex, zoom, gotomeeting, phone | Optional | -| enableSpeakerDiarization | Boolean | Tags each word corresponding to the speaker | [default to false] | | enablePunctuation | Boolean | Enables DeepAffects [Smart Punctuation API](text-punctuation-api.md) | [default to true] | +| enableSpeakerDiarization | Boolean | Tags each word corresponding to the speaker | [default to False] | +| separateSpeakerPerChannel | Boolean | Set to `True` if the input audio is multi-channel and each channel has a separate speaker | [default to False] The value will be used if `enableSpeakerDiarization` is set to `True`| audioType: can have the following values: 1) callcenter From e7771984b4a2b4aab4706a62f7d76d23e4ab01a3 Mon Sep 17 00:00:00 2001 From: Sushant Hiray Date: Fri, 31 May 2019 11:37:24 +0530 Subject: [PATCH 13/16] Use code blocks for job status api --- docs/job-status-api.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/job-status-api.md b/docs/job-status-api.md index 58cdc03..ed4dc43 100644 --- a/docs/job-status-api.md +++ b/docs/job-status-api.md @@ -12,14 +12,16 @@ Job Status API returns information about the status of the job and its correspon ### Sample Code -### Shell + + + ```shell curl -X GET \ 'https://proxy.api.deepaffects.com/transaction/generic/api/v1/async/status?apikey=&request_id=' ``` -### Javascript + ```javascript var request = require("request"); @@ -36,7 +38,7 @@ request(options, function (error, response, body) { }); ``` -### Python + ```python import requests @@ -53,6 +55,7 @@ response = requests.get(url, data=payload, headers=headers, params=querystring) print(response.text) ``` + ### Output From 08a9552e70b0890b29abc9d7f5e1ea9c9bd54d70 Mon Sep 17 00:00:00 2001 From: prashantkukde Date: Wed, 1 Jul 2020 19:25:59 -0700 Subject: [PATCH 14/16] Fixed the description Fixed the description --- docs/introduction.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/introduction.md b/docs/introduction.md index d661bd2..c3c3405 100644 --- a/docs/introduction.md +++ b/docs/introduction.md @@ -4,8 +4,8 @@ title: Introduction sidebar_label: Introduction --- -Welcome to the DeepAffects API! You can use this API to access all our API endpoints, such as the Audio Emotion API to analyze the emotion content of the audio clip, or the Denoising API to remove background noise from your audio clip. +Welcome to the DeepAffects API! The DeepAffects API exposes many of the audio, text & video recognition & analytics capabilities, to empower you to develop speech-enabled applications. The Developer portal provides a variety of resources for working with the DeepAffects REST API, and example components you can use to jump-start your integration. -The Sync and Async API is organized around [REST](http://en.wikipedia.org/wiki/Representational_State_Transfer). All requests should be made over SSL. All request and response bodies, including errors, are encoded in JSON. +The DeepAffects APIs are organized around [REST](http://en.wikipedia.org/wiki/Representational_State_Transfer) with Sync and Async options. All requests should be made over SSL. All request and response bodies, including errors, are encoded in JSON. The Realtime API is organized around [GRPC](http://grpc.io). All requests should be made using deepaffects client libraries. From b66189e4d567f7dec166cb9cab42c1659c3fa7d2 Mon Sep 17 00:00:00 2001 From: Sanchit Garg Date: Thu, 2 Jul 2020 08:43:45 +0530 Subject: [PATCH 15/16] rearranging side bars. moved jobs after apis. putting interaction api on top --- website/sidebars.json | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/website/sidebars.json b/website/sidebars.json index 0b980f4..b335769 100755 --- a/website/sidebars.json +++ b/website/sidebars.json @@ -7,21 +7,17 @@ "errors", "concepts" ], - "Jobs": [ - "job-status-api" - ], "Generic Audio Analysis": [ + "interaction-analytics-api", "speaker-diarization-api", "speech-to-text-api", - "interaction-analytics-api", "speaker-enrollment-api", "speaker-identification-api", "realtime-speaker-identification-api", "voice-activity-detection-api", "emotion-recognition-api", "realtime-emotion-recognition-api", - "audio-denoising-api", - "paralinguistic-feature-extraction-api" + "audio-denoising-api" ], "Generic Video Analysis": [ "chapter-detection-api" @@ -31,6 +27,9 @@ "text-punctuation-api", "text-playbook-group-api", "text-playbook-api" + ], + "Jobs": [ + "job-status-api" ] } } From b7ed2f5a4d58a221d17c3fc15aa8ac72803c33bc Mon Sep 17 00:00:00 2001 From: Sanchit Garg Date: Sat, 4 Jul 2020 01:08:08 +0530 Subject: [PATCH 16/16] Update introduction.md fixing the merge --- docs/introduction.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/introduction.md b/docs/introduction.md index 11b33c1..c3c3405 100644 --- a/docs/introduction.md +++ b/docs/introduction.md @@ -4,11 +4,7 @@ title: Introduction sidebar_label: Introduction --- -<<<<<<< HEAD Welcome to the DeepAffects API! The DeepAffects API exposes many of the audio, text & video recognition & analytics capabilities, to empower you to develop speech-enabled applications. The Developer portal provides a variety of resources for working with the DeepAffects REST API, and example components you can use to jump-start your integration. -======= -Welcome to the DeepAffects API! You can use this API to access all our API endpoints, such as the Interaction Analytics API for end to end conversation analysis, or Audio Emotion API to analyze the emotion content of the audio clip. ->>>>>>> origin/master The DeepAffects APIs are organized around [REST](http://en.wikipedia.org/wiki/Representational_State_Transfer) with Sync and Async options. All requests should be made over SSL. All request and response bodies, including errors, are encoded in JSON.