Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 466b44d

Browse filesBrowse files
authored
Dev/add usage details to Usage class (#726)
PR to enhance the `Usage` object and related logic, to support more granular token accounting, matching the details available in the [OpenAI Responses API](https://platform.openai.com/docs/api-reference/responses) . Specifically, it: - Adds `input_tokens_details` and `output_tokens_details` fields to the `Usage` dataclass, storing detailed token breakdowns (e.g., `cached_tokens`, `reasoning_tokens`). - Flows this change through - Updates and extends tests to match - Adds a test for the Usage.add method ### Motivation - Aligns the SDK’s usage with the latest OpenAI responses API Usage object - Supports downstream use cases that require fine-grained token usage data (e.g., billing, analytics, optimization) requested by startups --------- Co-authored-by: Wulfie Bain <wulfie@openai.com>
1 parent 428c9a6 commit 466b44d
Copy full SHA for 466b44d
Expand file treeCollapse file tree

11 files changed

+178
-15
lines changed

‎src/agents/extensions/models/litellm_model.py

Copy file name to clipboardExpand all lines: src/agents/extensions/models/litellm_model.py
+11Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from typing import Any, Literal, cast, overload
77

88
import litellm.types
9+
from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
910

1011
from agents.exceptions import ModelBehaviorError
1112

@@ -107,6 +108,16 @@ async def get_response(
107108
input_tokens=response_usage.prompt_tokens,
108109
output_tokens=response_usage.completion_tokens,
109110
total_tokens=response_usage.total_tokens,
111+
input_tokens_details=InputTokensDetails(
112+
cached_tokens=getattr(
113+
response_usage.prompt_tokens_details, "cached_tokens", 0
114+
)
115+
),
116+
output_tokens_details=OutputTokensDetails(
117+
reasoning_tokens=getattr(
118+
response_usage.completion_tokens_details, "reasoning_tokens", 0
119+
)
120+
),
110121
)
111122
if response.usage
112123
else Usage()

‎src/agents/models/openai_chatcompletions.py

Copy file name to clipboardExpand all lines: src/agents/models/openai_chatcompletions.py
+14-1Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from openai.types import ChatModel
1010
from openai.types.chat import ChatCompletion, ChatCompletionChunk
1111
from openai.types.responses import Response
12+
from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
1213

1314
from .. import _debug
1415
from ..agent_output import AgentOutputSchemaBase
@@ -83,6 +84,18 @@ async def get_response(
8384
input_tokens=response.usage.prompt_tokens,
8485
output_tokens=response.usage.completion_tokens,
8586
total_tokens=response.usage.total_tokens,
87+
input_tokens_details=InputTokensDetails(
88+
cached_tokens=getattr(
89+
response.usage.prompt_tokens_details, "cached_tokens", 0
90+
)
91+
or 0,
92+
),
93+
output_tokens_details=OutputTokensDetails(
94+
reasoning_tokens=getattr(
95+
response.usage.completion_tokens_details, "reasoning_tokens", 0
96+
)
97+
or 0,
98+
),
8699
)
87100
if response.usage
88101
else Usage()
@@ -252,7 +265,7 @@ async def _fetch_response(
252265
stream_options=self._non_null_or_not_given(stream_options),
253266
store=self._non_null_or_not_given(store),
254267
reasoning_effort=self._non_null_or_not_given(reasoning_effort),
255-
extra_headers={ **HEADERS, **(model_settings.extra_headers or {}) },
268+
extra_headers={**HEADERS, **(model_settings.extra_headers or {})},
256269
extra_query=model_settings.extra_query,
257270
extra_body=model_settings.extra_body,
258271
metadata=self._non_null_or_not_given(model_settings.metadata),

‎src/agents/models/openai_responses.py

Copy file name to clipboardExpand all lines: src/agents/models/openai_responses.py
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ async def get_response(
9898
input_tokens=response.usage.input_tokens,
9999
output_tokens=response.usage.output_tokens,
100100
total_tokens=response.usage.total_tokens,
101+
input_tokens_details=response.usage.input_tokens_details,
102+
output_tokens_details=response.usage.output_tokens_details,
101103
)
102104
if response.usage
103105
else Usage()

‎src/agents/run.py

Copy file name to clipboardExpand all lines: src/agents/run.py
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -689,6 +689,8 @@ async def _run_single_turn_streamed(
689689
input_tokens=event.response.usage.input_tokens,
690690
output_tokens=event.response.usage.output_tokens,
691691
total_tokens=event.response.usage.total_tokens,
692+
input_tokens_details=event.response.usage.input_tokens_details,
693+
output_tokens_details=event.response.usage.output_tokens_details,
692694
)
693695
if event.response.usage
694696
else Usage()

‎src/agents/usage.py

Copy file name to clipboardExpand all lines: src/agents/usage.py
+21-1Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
from dataclasses import dataclass
1+
from dataclasses import dataclass, field
2+
3+
from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
24

35

46
@dataclass
@@ -9,9 +11,18 @@ class Usage:
911
input_tokens: int = 0
1012
"""Total input tokens sent, across all requests."""
1113

14+
input_tokens_details: InputTokensDetails = field(
15+
default_factory=lambda: InputTokensDetails(cached_tokens=0)
16+
)
17+
"""Details about the input tokens, matching responses API usage details."""
1218
output_tokens: int = 0
1319
"""Total output tokens received, across all requests."""
1420

21+
output_tokens_details: OutputTokensDetails = field(
22+
default_factory=lambda: OutputTokensDetails(reasoning_tokens=0)
23+
)
24+
"""Details about the output tokens, matching responses API usage details."""
25+
1526
total_tokens: int = 0
1627
"""Total tokens sent and received, across all requests."""
1728

@@ -20,3 +31,12 @@ def add(self, other: "Usage") -> None:
2031
self.input_tokens += other.input_tokens if other.input_tokens else 0
2132
self.output_tokens += other.output_tokens if other.output_tokens else 0
2233
self.total_tokens += other.total_tokens if other.total_tokens else 0
34+
self.input_tokens_details = InputTokensDetails(
35+
cached_tokens=self.input_tokens_details.cached_tokens
36+
+ other.input_tokens_details.cached_tokens
37+
)
38+
39+
self.output_tokens_details = OutputTokensDetails(
40+
reasoning_tokens=self.output_tokens_details.reasoning_tokens
41+
+ other.output_tokens_details.reasoning_tokens
42+
)

‎tests/models/test_litellm_chatcompletions_stream.py

Copy file name to clipboardExpand all lines: tests/models/test_litellm_chatcompletions_stream.py
+14-2Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@
88
ChoiceDeltaToolCall,
99
ChoiceDeltaToolCallFunction,
1010
)
11-
from openai.types.completion_usage import CompletionUsage
11+
from openai.types.completion_usage import (
12+
CompletionTokensDetails,
13+
CompletionUsage,
14+
PromptTokensDetails,
15+
)
1216
from openai.types.responses import (
1317
Response,
1418
ResponseFunctionToolCall,
@@ -46,7 +50,13 @@ async def test_stream_response_yields_events_for_text_content(monkeypatch) -> No
4650
model="fake",
4751
object="chat.completion.chunk",
4852
choices=[Choice(index=0, delta=ChoiceDelta(content="llo"))],
49-
usage=CompletionUsage(completion_tokens=5, prompt_tokens=7, total_tokens=12),
53+
usage=CompletionUsage(
54+
completion_tokens=5,
55+
prompt_tokens=7,
56+
total_tokens=12,
57+
completion_tokens_details=CompletionTokensDetails(reasoning_tokens=2),
58+
prompt_tokens_details=PromptTokensDetails(cached_tokens=6),
59+
),
5060
)
5161

5262
async def fake_stream() -> AsyncIterator[ChatCompletionChunk]:
@@ -112,6 +122,8 @@ async def patched_fetch_response(self, *args, **kwargs):
112122
assert completed_resp.usage.input_tokens == 7
113123
assert completed_resp.usage.output_tokens == 5
114124
assert completed_resp.usage.total_tokens == 12
125+
assert completed_resp.usage.input_tokens_details.cached_tokens == 6
126+
assert completed_resp.usage.output_tokens_details.reasoning_tokens == 2
115127

116128

117129
@pytest.mark.allow_call_model_methods

‎tests/test_extra_headers.py

Copy file name to clipboardExpand all lines: tests/test_extra_headers.py
+14-6Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import pytest
22
from openai.types.chat.chat_completion import ChatCompletion, Choice
33
from openai.types.chat.chat_completion_message import ChatCompletionMessage
4+
from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
45

56
from agents import ModelSettings, ModelTracing, OpenAIChatCompletionsModel, OpenAIResponsesModel
67

@@ -17,21 +18,29 @@ class DummyResponses:
1718
async def create(self, **kwargs):
1819
nonlocal called_kwargs
1920
called_kwargs = kwargs
21+
2022
class DummyResponse:
2123
id = "dummy"
2224
output = []
2325
usage = type(
24-
"Usage", (), {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
26+
"Usage",
27+
(),
28+
{
29+
"input_tokens": 0,
30+
"output_tokens": 0,
31+
"total_tokens": 0,
32+
"input_tokens_details": InputTokensDetails(cached_tokens=0),
33+
"output_tokens_details": OutputTokensDetails(reasoning_tokens=0),
34+
},
2535
)()
36+
2637
return DummyResponse()
2738

2839
class DummyClient:
2940
def __init__(self):
3041
self.responses = DummyResponses()
3142

32-
33-
34-
model = OpenAIResponsesModel(model="gpt-4", openai_client=DummyClient()) # type: ignore
43+
model = OpenAIResponsesModel(model="gpt-4", openai_client=DummyClient()) # type: ignore
3544
extra_headers = {"X-Test-Header": "test-value"}
3645
await model.get_response(
3746
system_instructions=None,
@@ -47,7 +56,6 @@ def __init__(self):
4756
assert called_kwargs["extra_headers"]["X-Test-Header"] == "test-value"
4857

4958

50-
5159
@pytest.mark.allow_call_model_methods
5260
@pytest.mark.asyncio
5361
async def test_extra_headers_passed_to_openai_client():
@@ -76,7 +84,7 @@ def __init__(self):
7684
self.chat = type("_Chat", (), {"completions": DummyCompletions()})()
7785
self.base_url = "https://api.openai.com"
7886

79-
model = OpenAIChatCompletionsModel(model="gpt-4", openai_client=DummyClient()) # type: ignore
87+
model = OpenAIChatCompletionsModel(model="gpt-4", openai_client=DummyClient()) # type: ignore
8088
extra_headers = {"X-Test-Header": "test-value"}
8189
await model.get_response(
8290
system_instructions=None,

‎tests/test_openai_chatcompletions.py

Copy file name to clipboardExpand all lines: tests/test_openai_chatcompletions.py
+15-2Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@
1313
ChatCompletionMessageToolCall,
1414
Function,
1515
)
16-
from openai.types.completion_usage import CompletionUsage
16+
from openai.types.completion_usage import (
17+
CompletionUsage,
18+
PromptTokensDetails,
19+
)
1720
from openai.types.responses import (
1821
Response,
1922
ResponseFunctionToolCall,
@@ -51,7 +54,13 @@ async def test_get_response_with_text_message(monkeypatch) -> None:
5154
model="fake",
5255
object="chat.completion",
5356
choices=[choice],
54-
usage=CompletionUsage(completion_tokens=5, prompt_tokens=7, total_tokens=12),
57+
usage=CompletionUsage(
58+
completion_tokens=5,
59+
prompt_tokens=7,
60+
total_tokens=12,
61+
# completion_tokens_details left blank to test default
62+
prompt_tokens_details=PromptTokensDetails(cached_tokens=3),
63+
),
5564
)
5665

5766
async def patched_fetch_response(self, *args, **kwargs):
@@ -81,6 +90,8 @@ async def patched_fetch_response(self, *args, **kwargs):
8190
assert resp.usage.input_tokens == 7
8291
assert resp.usage.output_tokens == 5
8392
assert resp.usage.total_tokens == 12
93+
assert resp.usage.input_tokens_details.cached_tokens == 3
94+
assert resp.usage.output_tokens_details.reasoning_tokens == 0
8495
assert resp.response_id is None
8596

8697

@@ -127,6 +138,8 @@ async def patched_fetch_response(self, *args, **kwargs):
127138
assert resp.usage.requests == 0
128139
assert resp.usage.input_tokens == 0
129140
assert resp.usage.output_tokens == 0
141+
assert resp.usage.input_tokens_details.cached_tokens == 0
142+
assert resp.usage.output_tokens_details.reasoning_tokens == 0
130143

131144

132145
@pytest.mark.allow_call_model_methods

‎tests/test_openai_chatcompletions_stream.py

Copy file name to clipboardExpand all lines: tests/test_openai_chatcompletions_stream.py
+14-2Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@
88
ChoiceDeltaToolCall,
99
ChoiceDeltaToolCallFunction,
1010
)
11-
from openai.types.completion_usage import CompletionUsage
11+
from openai.types.completion_usage import (
12+
CompletionTokensDetails,
13+
CompletionUsage,
14+
PromptTokensDetails,
15+
)
1216
from openai.types.responses import (
1317
Response,
1418
ResponseFunctionToolCall,
@@ -46,7 +50,13 @@ async def test_stream_response_yields_events_for_text_content(monkeypatch) -> No
4650
model="fake",
4751
object="chat.completion.chunk",
4852
choices=[Choice(index=0, delta=ChoiceDelta(content="llo"))],
49-
usage=CompletionUsage(completion_tokens=5, prompt_tokens=7, total_tokens=12),
53+
usage=CompletionUsage(
54+
completion_tokens=5,
55+
prompt_tokens=7,
56+
total_tokens=12,
57+
prompt_tokens_details=PromptTokensDetails(cached_tokens=2),
58+
completion_tokens_details=CompletionTokensDetails(reasoning_tokens=3),
59+
),
5060
)
5161

5262
async def fake_stream() -> AsyncIterator[ChatCompletionChunk]:
@@ -112,6 +122,8 @@ async def patched_fetch_response(self, *args, **kwargs):
112122
assert completed_resp.usage.input_tokens == 7
113123
assert completed_resp.usage.output_tokens == 5
114124
assert completed_resp.usage.total_tokens == 12
125+
assert completed_resp.usage.input_tokens_details.cached_tokens == 2
126+
assert completed_resp.usage.output_tokens_details.reasoning_tokens == 3
115127

116128

117129
@pytest.mark.allow_call_model_methods

‎tests/test_responses_tracing.py

Copy file name to clipboardExpand all lines: tests/test_responses_tracing.py
+19-1Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
from typing import Optional
2+
13
import pytest
24
from inline_snapshot import snapshot
35
from openai import AsyncOpenAI
46
from openai.types.responses import ResponseCompletedEvent
7+
from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
58

69
from agents import ModelSettings, ModelTracing, OpenAIResponsesModel, trace
710
from agents.tracing.span_data import ResponseSpanData
@@ -16,10 +19,25 @@ def is_disabled(self):
1619

1720

1821
class DummyUsage:
19-
def __init__(self, input_tokens=1, output_tokens=1, total_tokens=2):
22+
def __init__(
23+
self,
24+
input_tokens: int = 1,
25+
input_tokens_details: Optional[InputTokensDetails] = None,
26+
output_tokens: int = 1,
27+
output_tokens_details: Optional[OutputTokensDetails] = None,
28+
total_tokens: int = 2,
29+
):
2030
self.input_tokens = input_tokens
2131
self.output_tokens = output_tokens
2232
self.total_tokens = total_tokens
33+
self.input_tokens_details = (
34+
input_tokens_details if input_tokens_details else InputTokensDetails(cached_tokens=0)
35+
)
36+
self.output_tokens_details = (
37+
output_tokens_details
38+
if output_tokens_details
39+
else OutputTokensDetails(reasoning_tokens=0)
40+
)
2341

2442

2543
class DummyResponse:

‎tests/test_usage.py

Copy file name to clipboard
+52Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
2+
3+
from agents.usage import Usage
4+
5+
6+
def test_usage_add_aggregates_all_fields():
7+
u1 = Usage(
8+
requests=1,
9+
input_tokens=10,
10+
input_tokens_details=InputTokensDetails(cached_tokens=3),
11+
output_tokens=20,
12+
output_tokens_details=OutputTokensDetails(reasoning_tokens=5),
13+
total_tokens=30,
14+
)
15+
u2 = Usage(
16+
requests=2,
17+
input_tokens=7,
18+
input_tokens_details=InputTokensDetails(cached_tokens=4),
19+
output_tokens=8,
20+
output_tokens_details=OutputTokensDetails(reasoning_tokens=6),
21+
total_tokens=15,
22+
)
23+
24+
u1.add(u2)
25+
26+
assert u1.requests == 3
27+
assert u1.input_tokens == 17
28+
assert u1.output_tokens == 28
29+
assert u1.total_tokens == 45
30+
assert u1.input_tokens_details.cached_tokens == 7
31+
assert u1.output_tokens_details.reasoning_tokens == 11
32+
33+
34+
def test_usage_add_aggregates_with_none_values():
35+
u1 = Usage()
36+
u2 = Usage(
37+
requests=2,
38+
input_tokens=7,
39+
input_tokens_details=InputTokensDetails(cached_tokens=4),
40+
output_tokens=8,
41+
output_tokens_details=OutputTokensDetails(reasoning_tokens=6),
42+
total_tokens=15,
43+
)
44+
45+
u1.add(u2)
46+
47+
assert u1.requests == 2
48+
assert u1.input_tokens == 7
49+
assert u1.output_tokens == 8
50+
assert u1.total_tokens == 15
51+
assert u1.input_tokens_details.cached_tokens == 4
52+
assert u1.output_tokens_details.reasoning_tokens == 6

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.