Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit fd586cb

Browse filesBrowse files
feat(api): responses x eval api
1 parent f26c5fc commit fd586cb
Copy full SHA for fd586cb

24 files changed

+645
-1097
lines changed

‎.stats.yml

Copy file name to clipboard
+3-3Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 101
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-794a6ed3c3d3d77887564755168056af8a426b17cf1ec721e3a300503dc22a41.yml
3-
openapi_spec_hash: 25a81c220713cd5b0bafc221d1dfa79a
4-
config_hash: 0b768ed1b56c6d82816f0fa40dc4aaf5
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-161ca7f1cfd7b33c1fc07d0ce25dfe4be5a7271c394f4cb526b7fb21b0729900.yml
3+
openapi_spec_hash: 602e14add4bee018c6774e320ce309b8
4+
config_hash: 7da27f7260075e8813ddcea542fba1bf

‎api.md

Copy file name to clipboardExpand all lines: api.md
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,7 @@ Types:
787787
```python
788788
from openai.types import (
789789
EvalCustomDataSourceConfig,
790+
EvalLogsDataSourceConfig,
790791
EvalStoredCompletionsDataSourceConfig,
791792
EvalCreateResponse,
792793
EvalRetrieveResponse,
@@ -812,6 +813,7 @@ Types:
812813
from openai.types.evals import (
813814
CreateEvalCompletionsRunDataSource,
814815
CreateEvalJSONLRunDataSource,
816+
CreateEvalResponsesRunDataSource,
815817
EvalAPIError,
816818
RunCreateResponse,
817819
RunRetrieveResponse,

‎src/openai/resources/audio/transcriptions.py

Copy file name to clipboardExpand all lines: src/openai/resources/audio/transcriptions.py
+93-1Lines changed: 93 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def create(
5757
*,
5858
file: FileTypes,
5959
model: Union[str, AudioModel],
60+
chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
6061
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
6162
response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
6263
language: str | NotGiven = NOT_GIVEN,
@@ -118,6 +119,7 @@ def create(
118119
file: FileTypes,
119120
model: Union[str, AudioModel],
120121
stream: Literal[True],
122+
chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
121123
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
122124
language: str | NotGiven = NOT_GIVEN,
123125
prompt: str | NotGiven = NOT_GIVEN,
@@ -152,6 +154,11 @@ def create(
152154
153155
Note: Streaming is not supported for the `whisper-1` model and will be ignored.
154156
157+
chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
158+
first normalizes loudness and then uses voice activity detection (VAD) to choose
159+
boundaries. `server_vad` object can be provided to tweak VAD detection
160+
parameters manually. If unset, the audio is transcribed as a single block.
161+
155162
include: Additional information to include in the transcription response. `logprobs` will
156163
return the log probabilities of the tokens in the response to understand the
157164
model's confidence in the transcription. `logprobs` only works with
@@ -200,6 +207,7 @@ def create(
200207
file: FileTypes,
201208
model: Union[str, AudioModel],
202209
stream: bool,
210+
chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
203211
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
204212
language: str | NotGiven = NOT_GIVEN,
205213
prompt: str | NotGiven = NOT_GIVEN,
@@ -234,6 +242,11 @@ def create(
234242
235243
Note: Streaming is not supported for the `whisper-1` model and will be ignored.
236244
245+
chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
246+
first normalizes loudness and then uses voice activity detection (VAD) to choose
247+
boundaries. `server_vad` object can be provided to tweak VAD detection
248+
parameters manually. If unset, the audio is transcribed as a single block.
249+
237250
include: Additional information to include in the transcription response. `logprobs` will
238251
return the log probabilities of the tokens in the response to understand the
239252
model's confidence in the transcription. `logprobs` only works with
@@ -281,6 +294,7 @@ def create(
281294
*,
282295
file: FileTypes,
283296
model: Union[str, AudioModel],
297+
chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
284298
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
285299
language: str | NotGiven = NOT_GIVEN,
286300
prompt: str | NotGiven = NOT_GIVEN,
@@ -299,6 +313,7 @@ def create(
299313
{
300314
"file": file,
301315
"model": model,
316+
"chunking_strategy": chunking_strategy,
302317
"include": include,
303318
"language": language,
304319
"prompt": prompt,
@@ -357,6 +372,8 @@ async def create(
357372
*,
358373
file: FileTypes,
359374
model: Union[str, AudioModel],
375+
chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
376+
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
360377
response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
361378
language: str | NotGiven = NOT_GIVEN,
362379
prompt: str | NotGiven = NOT_GIVEN,
@@ -369,7 +386,68 @@ async def create(
369386
extra_query: Query | None = None,
370387
extra_body: Body | None = None,
371388
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
372-
) -> Transcription: ...
389+
) -> TranscriptionCreateResponse:
390+
"""
391+
Transcribes audio into the input language.
392+
393+
Args:
394+
file:
395+
The audio file object (not file name) to transcribe, in one of these formats:
396+
flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
397+
398+
model: ID of the model to use. The options are `gpt-4o-transcribe`,
399+
`gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
400+
Whisper V2 model).
401+
402+
chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
403+
first normalizes loudness and then uses voice activity detection (VAD) to choose
404+
boundaries. `server_vad` object can be provided to tweak VAD detection
405+
parameters manually. If unset, the audio is transcribed as a single block.
406+
407+
include: Additional information to include in the transcription response. `logprobs` will
408+
return the log probabilities of the tokens in the response to understand the
409+
model's confidence in the transcription. `logprobs` only works with
410+
response_format set to `json` and only with the models `gpt-4o-transcribe` and
411+
`gpt-4o-mini-transcribe`.
412+
413+
language: The language of the input audio. Supplying the input language in
414+
[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
415+
format will improve accuracy and latency.
416+
417+
prompt: An optional text to guide the model's style or continue a previous audio
418+
segment. The
419+
[prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
420+
should match the audio language.
421+
422+
response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
423+
`verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
424+
the only supported format is `json`.
425+
426+
stream: If set to true, the model response data will be streamed to the client as it is
427+
generated using
428+
[server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
429+
See the
430+
[Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
431+
for more information.
432+
433+
Note: Streaming is not supported for the `whisper-1` model and will be ignored.
434+
435+
temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
436+
output more random, while lower values like 0.2 will make it more focused and
437+
deterministic. If set to 0, the model will use
438+
[log probability](https://en.wikipedia.org/wiki/Log_probability) to
439+
automatically increase the temperature until certain thresholds are hit.
440+
441+
timestamp_granularities: The timestamp granularities to populate for this transcription.
442+
`response_format` must be set `verbose_json` to use timestamp granularities.
443+
Either or both of these options are supported: `word`, or `segment`. Note: There
444+
is no additional latency for segment timestamps, but generating word timestamps
445+
incurs additional latency.
446+
447+
extra_headers: Send extra headers
448+
449+
extra_query: Add additional query parameters to the request
450+
"""
373451

374452
@overload
375453
async def create(
@@ -418,6 +496,7 @@ async def create(
418496
file: FileTypes,
419497
model: Union[str, AudioModel],
420498
stream: Literal[True],
499+
chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
421500
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
422501
language: str | NotGiven = NOT_GIVEN,
423502
prompt: str | NotGiven = NOT_GIVEN,
@@ -452,6 +531,11 @@ async def create(
452531
453532
Note: Streaming is not supported for the `whisper-1` model and will be ignored.
454533
534+
chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
535+
first normalizes loudness and then uses voice activity detection (VAD) to choose
536+
boundaries. `server_vad` object can be provided to tweak VAD detection
537+
parameters manually. If unset, the audio is transcribed as a single block.
538+
455539
include: Additional information to include in the transcription response. `logprobs` will
456540
return the log probabilities of the tokens in the response to understand the
457541
model's confidence in the transcription. `logprobs` only works with
@@ -500,6 +584,7 @@ async def create(
500584
file: FileTypes,
501585
model: Union[str, AudioModel],
502586
stream: bool,
587+
chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
503588
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
504589
language: str | NotGiven = NOT_GIVEN,
505590
prompt: str | NotGiven = NOT_GIVEN,
@@ -534,6 +619,11 @@ async def create(
534619
535620
Note: Streaming is not supported for the `whisper-1` model and will be ignored.
536621
622+
chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
623+
first normalizes loudness and then uses voice activity detection (VAD) to choose
624+
boundaries. `server_vad` object can be provided to tweak VAD detection
625+
parameters manually. If unset, the audio is transcribed as a single block.
626+
537627
include: Additional information to include in the transcription response. `logprobs` will
538628
return the log probabilities of the tokens in the response to understand the
539629
model's confidence in the transcription. `logprobs` only works with
@@ -581,6 +671,7 @@ async def create(
581671
*,
582672
file: FileTypes,
583673
model: Union[str, AudioModel],
674+
chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
584675
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
585676
language: str | NotGiven = NOT_GIVEN,
586677
prompt: str | NotGiven = NOT_GIVEN,
@@ -599,6 +690,7 @@ async def create(
599690
{
600691
"file": file,
601692
"model": model,
693+
"chunking_strategy": chunking_strategy,
602694
"include": include,
603695
"language": language,
604696
"prompt": prompt,

‎src/openai/resources/embeddings.py

Copy file name to clipboardExpand all lines: src/openai/resources/embeddings.py
+8-6Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,12 @@ def create(
6666
input: Input text to embed, encoded as a string or array of tokens. To embed multiple
6767
inputs in a single request, pass an array of strings or array of token arrays.
6868
The input must not exceed the max input tokens for the model (8192 tokens for
69-
`text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
69+
all embedding models), cannot be an empty string, and any array must be 2048
7070
dimensions or less.
7171
[Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
72-
for counting tokens. Some models may also impose a limit on total number of
73-
tokens summed across inputs.
72+
for counting tokens. In addition to the per-input token limit, all embedding
73+
models enforce a maximum of 300,000 tokens summed across all inputs in a single
74+
request.
7475
7576
model: ID of the model to use. You can use the
7677
[List models](https://platform.openai.com/docs/api-reference/models/list) API to
@@ -181,11 +182,12 @@ async def create(
181182
input: Input text to embed, encoded as a string or array of tokens. To embed multiple
182183
inputs in a single request, pass an array of strings or array of token arrays.
183184
The input must not exceed the max input tokens for the model (8192 tokens for
184-
`text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
185+
all embedding models), cannot be an empty string, and any array must be 2048
185186
dimensions or less.
186187
[Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
187-
for counting tokens. Some models may also impose a limit on total number of
188-
tokens summed across inputs.
188+
for counting tokens. In addition to the per-input token limit, all embedding
189+
models enforce a maximum of 300,000 tokens summed across all inputs in a single
190+
request.
189191
190192
model: ID of the model to use. You can use the
191193
[List models](https://platform.openai.com/docs/api-reference/models/list) API to

‎src/openai/types/__init__.py

Copy file name to clipboardExpand all lines: src/openai/types/__init__.py
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
from .vector_store_search_params import VectorStoreSearchParams as VectorStoreSearchParams
7171
from .vector_store_update_params import VectorStoreUpdateParams as VectorStoreUpdateParams
7272
from .moderation_text_input_param import ModerationTextInputParam as ModerationTextInputParam
73+
from .eval_logs_data_source_config import EvalLogsDataSourceConfig as EvalLogsDataSourceConfig
7374
from .file_chunking_strategy_param import FileChunkingStrategyParam as FileChunkingStrategyParam
7475
from .vector_store_search_response import VectorStoreSearchResponse as VectorStoreSearchResponse
7576
from .websocket_connection_options import WebsocketConnectionOptions as WebsocketConnectionOptions

‎src/openai/types/audio/transcription_create_params.py

Copy file name to clipboardExpand all lines: src/openai/types/audio/transcription_create_params.py
+37-1Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from __future__ import annotations
44

55
from typing import List, Union, Optional
6-
from typing_extensions import Literal, Required, TypedDict
6+
from typing_extensions import Literal, Required, TypeAlias, TypedDict
77

88
from ..._types import FileTypes
99
from ..audio_model import AudioModel
@@ -12,6 +12,8 @@
1212

1313
__all__ = [
1414
"TranscriptionCreateParamsBase",
15+
"ChunkingStrategy",
16+
"ChunkingStrategyVadConfig",
1517
"TranscriptionCreateParamsNonStreaming",
1618
"TranscriptionCreateParamsStreaming",
1719
]
@@ -31,6 +33,15 @@ class TranscriptionCreateParamsBase(TypedDict, total=False):
3133
(which is powered by our open source Whisper V2 model).
3234
"""
3335

36+
chunking_strategy: Optional[ChunkingStrategy]
37+
"""Controls how the audio is cut into chunks.
38+
39+
When set to `"auto"`, the server first normalizes loudness and then uses voice
40+
activity detection (VAD) to choose boundaries. `server_vad` object can be
41+
provided to tweak VAD detection parameters manually. If unset, the audio is
42+
transcribed as a single block.
43+
"""
44+
3445
include: List[TranscriptionInclude]
3546
"""Additional information to include in the transcription response.
3647
@@ -82,6 +93,31 @@ class TranscriptionCreateParamsBase(TypedDict, total=False):
8293
"""
8394

8495

96+
class ChunkingStrategyVadConfig(TypedDict, total=False):
97+
type: Required[Literal["server_vad"]]
98+
"""Must be set to `server_vad` to enable manual chunking using server side VAD."""
99+
100+
prefix_padding_ms: int
101+
"""Amount of audio to include before the VAD detected speech (in milliseconds)."""
102+
103+
silence_duration_ms: int
104+
"""
105+
Duration of silence to detect speech stop (in milliseconds). With shorter values
106+
the model will respond more quickly, but may jump in on short pauses from the
107+
user.
108+
"""
109+
110+
threshold: float
111+
"""Sensitivity threshold (0.0 to 1.0) for voice activity detection.
112+
113+
A higher threshold will require louder audio to activate the model, and thus
114+
might perform better in noisy environments.
115+
"""
116+
117+
118+
ChunkingStrategy: TypeAlias = Union[Literal["auto"], ChunkingStrategyVadConfig]
119+
120+
85121
class TranscriptionCreateParamsNonStreaming(TranscriptionCreateParamsBase, total=False):
86122
stream: Optional[Literal[False]]
87123
"""

‎src/openai/types/embedding_create_params.py

Copy file name to clipboardExpand all lines: src/openai/types/embedding_create_params.py
+5-4Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@ class EmbeddingCreateParams(TypedDict, total=False):
1616
1717
To embed multiple inputs in a single request, pass an array of strings or array
1818
of token arrays. The input must not exceed the max input tokens for the model
19-
(8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any
20-
array must be 2048 dimensions or less.
19+
(8192 tokens for all embedding models), cannot be an empty string, and any array
20+
must be 2048 dimensions or less.
2121
[Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
22-
for counting tokens. Some models may also impose a limit on total number of
23-
tokens summed across inputs.
22+
for counting tokens. In addition to the per-input token limit, all embedding
23+
models enforce a maximum of 300,000 tokens summed across all inputs in a single
24+
request.
2425
"""
2526

2627
model: Required[Union[str, EmbeddingModel]]

‎src/openai/types/eval_create_params.py

Copy file name to clipboardExpand all lines: src/openai/types/eval_create_params.py
+12-3Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
"EvalCreateParams",
1717
"DataSourceConfig",
1818
"DataSourceConfigCustom",
19+
"DataSourceConfigLogs",
1920
"DataSourceConfigStoredCompletions",
2021
"TestingCriterion",
2122
"TestingCriterionLabelModel",
@@ -65,15 +66,23 @@ class DataSourceConfigCustom(TypedDict, total=False):
6566
"""
6667

6768

69+
class DataSourceConfigLogs(TypedDict, total=False):
70+
type: Required[Literal["logs"]]
71+
"""The type of data source. Always `logs`."""
72+
73+
metadata: Dict[str, object]
74+
"""Metadata filters for the logs data source."""
75+
76+
6877
class DataSourceConfigStoredCompletions(TypedDict, total=False):
69-
type: Required[Literal["stored_completions"]]
70-
"""The type of data source. Always `stored_completions`."""
78+
type: Required[Literal["stored-completions"]]
79+
"""The type of data source. Always `stored-completions`."""
7180

7281
metadata: Dict[str, object]
7382
"""Metadata filters for the stored completions data source."""
7483

7584

76-
DataSourceConfig: TypeAlias = Union[DataSourceConfigCustom, DataSourceConfigStoredCompletions]
85+
DataSourceConfig: TypeAlias = Union[DataSourceConfigCustom, DataSourceConfigLogs, DataSourceConfigStoredCompletions]
7786

7887

7988
class TestingCriterionLabelModelInputSimpleInputMessage(TypedDict, total=False):

‎src/openai/types/eval_create_response.py

Copy file name to clipboardExpand all lines: src/openai/types/eval_create_response.py
+3-1Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from .graders.label_model_grader import LabelModelGrader
1111
from .graders.score_model_grader import ScoreModelGrader
1212
from .graders.string_check_grader import StringCheckGrader
13+
from .eval_logs_data_source_config import EvalLogsDataSourceConfig
1314
from .eval_custom_data_source_config import EvalCustomDataSourceConfig
1415
from .graders.text_similarity_grader import TextSimilarityGrader
1516
from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
@@ -24,7 +25,8 @@
2425
]
2526

2627
DataSourceConfig: TypeAlias = Annotated[
27-
Union[EvalCustomDataSourceConfig, EvalStoredCompletionsDataSourceConfig], PropertyInfo(discriminator="type")
28+
Union[EvalCustomDataSourceConfig, EvalLogsDataSourceConfig, EvalStoredCompletionsDataSourceConfig],
29+
PropertyInfo(discriminator="type"),
2830
]
2931

3032

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.