diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py index 5e287ebbd6..5f89155092 100644 --- a/pydantic_ai_slim/pydantic_ai/_agent_graph.py +++ b/pydantic_ai_slim/pydantic_ai/_agent_graph.py @@ -547,7 +547,7 @@ async def _run_stream( # noqa: C901 async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]: # noqa: C901 text = '' tool_calls: list[_messages.ToolCallPart] = [] - thinking_parts: list[_messages.ThinkingPart] = [] + invisible_parts: bool = False for part in self.model_response.parts: if isinstance(part, _messages.TextPart): @@ -558,11 +558,13 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]: # noqa # Text parts before a built-in tool call are essentially thoughts, # not part of the final result output, so we reset the accumulated text text = '' + invisible_parts = True yield _messages.BuiltinToolCallEvent(part) # pyright: ignore[reportDeprecated] elif isinstance(part, _messages.BuiltinToolReturnPart): + invisible_parts = True yield _messages.BuiltinToolResultEvent(part) # pyright: ignore[reportDeprecated] elif isinstance(part, _messages.ThinkingPart): - thinking_parts.append(part) + invisible_parts = True else: assert_never(part) @@ -570,43 +572,51 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]: # noqa # In the future, we'd consider making this configurable at the agent or run level. # This accounts for cases like anthropic returns that might contain a text response # and a tool call response, where the text response just indicates the tool call will happen. - if tool_calls: - async for event in self._handle_tool_calls(ctx, tool_calls): - yield event - elif text: - # No events are emitted during the handling of text responses, so we don't need to yield anything - self._next_node = await self._handle_text_response(ctx, text) - elif thinking_parts: - # handle thinking-only responses (responses that contain only ThinkingPart instances) - # this can happen with models that support thinking mode when they don't provide - # actionable output alongside their thinking content. - self._next_node = ModelRequestNode[DepsT, NodeRunEndT]( - _messages.ModelRequest( - parts=[_messages.RetryPromptPart('Responses without text or tool calls are not permitted.')] + try: + if tool_calls: + async for event in self._handle_tool_calls(ctx, tool_calls): + yield event + elif text: + # No events are emitted during the handling of text responses, so we don't need to yield anything + self._next_node = await self._handle_text_response(ctx, text) + elif invisible_parts: + # handle responses with only thinking or built-in tool parts. + # this can happen with models that support thinking mode when they don't provide + # actionable output alongside their thinking content. so we tell the model to try again. + m = _messages.RetryPromptPart( + content='Responses without text or tool calls are not permitted.', ) - ) - else: - # we got an empty response with no tool calls, text, or thinking - # this sometimes happens with anthropic (and perhaps other models) - # when the model has already returned text along side tool calls - # in this scenario, if text responses are allowed, we return text from the most recent model - # response, if any - if isinstance(ctx.deps.output_schema, _output.TextOutputSchema): - for message in reversed(ctx.state.message_history): - if isinstance(message, _messages.ModelResponse): - text = '' - for part in message.parts: - if isinstance(part, _messages.TextPart): - text += part.content - elif isinstance(part, _messages.BuiltinToolCallPart): - # Text parts before a built-in tool call are essentially thoughts, - # not part of the final result output, so we reset the accumulated text - text = '' # pragma: no cover - if text: - self._next_node = await self._handle_text_response(ctx, text) - return - - raise exceptions.UnexpectedModelBehavior('Received empty model response') + raise ToolRetryError(m) + else: + # we got an empty response with no tool calls, text, thinking, or built-in tool calls. + # this sometimes happens with anthropic (and perhaps other models) + # when the model has already returned text along side tool calls + # in this scenario, if text responses are allowed, we return text from the most recent model + # response, if any + if isinstance(ctx.deps.output_schema, _output.TextOutputSchema): + for message in reversed(ctx.state.message_history): + if isinstance(message, _messages.ModelResponse): + text = '' + for part in message.parts: + if isinstance(part, _messages.TextPart): + text += part.content + elif isinstance(part, _messages.BuiltinToolCallPart): + # Text parts before a built-in tool call are essentially thoughts, + # not part of the final result output, so we reset the accumulated text + text = '' # pragma: no cover + if text: + self._next_node = await self._handle_text_response(ctx, text) + return + + # Go back to the model request node with an empty request, which means we'll essentially + # resubmit the most recent request that resulted in an empty response, + # as the empty response and request will not create any items in the API payload, + # in the hope the model will return a non-empty response this time. + ctx.state.increment_retries(ctx.deps.max_result_retries) + self._next_node = ModelRequestNode[DepsT, NodeRunEndT](_messages.ModelRequest(parts=[])) + except ToolRetryError as e: + ctx.state.increment_retries(ctx.deps.max_result_retries, e) + self._next_node = ModelRequestNode[DepsT, NodeRunEndT](_messages.ModelRequest(parts=[e.tool_retry])) self._events_iterator = _run_stream() @@ -666,23 +676,19 @@ async def _handle_text_response( text: str, ) -> ModelRequestNode[DepsT, NodeRunEndT] | End[result.FinalResult[NodeRunEndT]]: output_schema = ctx.deps.output_schema - try: - run_context = build_run_context(ctx) - if isinstance(output_schema, _output.TextOutputSchema): - result_data = await output_schema.process(text, run_context) - else: - m = _messages.RetryPromptPart( - content='Plain text responses are not permitted, please include your response in a tool call', - ) - raise ToolRetryError(m) + run_context = build_run_context(ctx) - for validator in ctx.deps.output_validators: - result_data = await validator.validate(result_data, run_context) - except ToolRetryError as e: - ctx.state.increment_retries(ctx.deps.max_result_retries, e) - return ModelRequestNode[DepsT, NodeRunEndT](_messages.ModelRequest(parts=[e.tool_retry])) + if isinstance(output_schema, _output.TextOutputSchema): + result_data = await output_schema.process(text, run_context) else: - return self._handle_final_result(ctx, result.FinalResult(result_data), []) + m = _messages.RetryPromptPart( + content='Plain text responses are not permitted, please include your response in a tool call', + ) + raise ToolRetryError(m) + + for validator in ctx.deps.output_validators: + result_data = await validator.validate(result_data, run_context) + return self._handle_final_result(ctx, result.FinalResult(result_data), []) __repr__ = dataclasses_no_defaults_repr diff --git a/pydantic_ai_slim/pydantic_ai/agent/__init__.py b/pydantic_ai_slim/pydantic_ai/agent/__init__.py index f1d3d4e02e..b70f541262 100644 --- a/pydantic_ai_slim/pydantic_ai/agent/__init__.py +++ b/pydantic_ai_slim/pydantic_ai/agent/__init__.py @@ -259,7 +259,8 @@ def __init__( name: The name of the agent, used for logging. If `None`, we try to infer the agent name from the call frame when the agent is first run. model_settings: Optional model request settings to use for this agent's runs, by default. - retries: The default number of retries to allow before raising an error. + retries: The default number of retries to allow for tool calls and output validation, before raising an error. + For model request retries, see the [HTTP Request Retries](../retries.md) documentation. output_retries: The maximum number of retries to allow for output validation, defaults to `retries`. tools: Tools to register with the agent, you can also register tools via the decorators [`@agent.tool`][pydantic_ai.Agent.tool] and [`@agent.tool_plain`][pydantic_ai.Agent.tool_plain]. diff --git a/pydantic_ai_slim/pydantic_ai/format_prompt.py b/pydantic_ai_slim/pydantic_ai/format_prompt.py index 42e6b4ac6f..2c403ec8e9 100644 --- a/pydantic_ai_slim/pydantic_ai/format_prompt.py +++ b/pydantic_ai_slim/pydantic_ai/format_prompt.py @@ -1,15 +1,17 @@ from __future__ import annotations as _annotations from collections.abc import Iterable, Iterator, Mapping -from dataclasses import asdict, dataclass, is_dataclass +from dataclasses import asdict, dataclass, field, fields, is_dataclass from datetime import date -from typing import Any +from typing import Any, Literal from xml.etree import ElementTree from pydantic import BaseModel __all__ = ('format_as_xml',) +from pydantic.fields import ComputedFieldInfo, FieldInfo + def format_as_xml( obj: Any, @@ -17,6 +19,7 @@ def format_as_xml( item_tag: str = 'item', none_str: str = 'null', indent: str | None = ' ', + include_field_info: Literal['once'] | bool = False, ) -> str: """Format a Python object as XML. @@ -33,6 +36,10 @@ def format_as_xml( for dataclasses and Pydantic models. none_str: String to use for `None` values. indent: Indentation string to use for pretty printing. + include_field_info: Whether to include attributes like Pydantic `Field` attributes and dataclasses `field()` + `metadata` as XML attributes. In both cases the allowed `Field` attributes and `field()` metadata keys are + `title` and `description`. If a field is repeated in the data (e.g. in a list) by setting `once` + the attributes are included only in the first occurrence of an XML element relative to the same field. Returns: XML representation of the object. @@ -51,7 +58,12 @@ def format_as_xml( ''' ``` """ - el = _ToXml(item_tag=item_tag, none_str=none_str).to_xml(obj, root_tag) + el = _ToXml( + data=obj, + item_tag=item_tag, + none_str=none_str, + include_field_info=include_field_info, + ).to_xml(root_tag) if root_tag is None and el.text is None: join = '' if indent is None else '\n' return join.join(_rootless_xml_elements(el, indent)) @@ -63,11 +75,26 @@ def format_as_xml( @dataclass class _ToXml: + data: Any item_tag: str none_str: str - - def to_xml(self, value: Any, tag: str | None) -> ElementTree.Element: - element = ElementTree.Element(self.item_tag if tag is None else tag) + include_field_info: Literal['once'] | bool + # a map of Pydantic and dataclasses Field paths to their metadata: + # a field unique string representation and its class + _fields_info: dict[str, tuple[str, FieldInfo | ComputedFieldInfo]] = field(default_factory=dict) + # keep track of fields we have extracted attributes from + _included_fields: set[str] = field(default_factory=set) + # keep track of class names for dataclasses and Pydantic models, that occur in lists + _element_names: dict[str, str] = field(default_factory=dict) + # flag for parsing dataclasses and Pydantic models once + _is_info_extracted: bool = False + _FIELD_ATTRIBUTES = ('title', 'description') + + def to_xml(self, tag: str | None = None) -> ElementTree.Element: + return self._to_xml(value=self.data, path='', tag=tag) + + def _to_xml(self, value: Any, path: str, tag: str | None = None) -> ElementTree.Element: + element = self._create_element(self.item_tag if tag is None else tag, path) if value is None: element.text = self.none_str elif isinstance(value, str): @@ -79,31 +106,96 @@ def to_xml(self, value: Any, tag: str | None) -> ElementTree.Element: elif isinstance(value, date): element.text = value.isoformat() elif isinstance(value, Mapping): - self._mapping_to_xml(element, value) # pyright: ignore[reportUnknownArgumentType] + if tag is None and path in self._element_names: + element.tag = self._element_names[path] + self._mapping_to_xml(element, value, path) # pyright: ignore[reportUnknownArgumentType] elif is_dataclass(value) and not isinstance(value, type): + self._init_structure_info() if tag is None: - element = ElementTree.Element(value.__class__.__name__) - dc_dict = asdict(value) - self._mapping_to_xml(element, dc_dict) + element.tag = value.__class__.__name__ + self._mapping_to_xml(element, asdict(value), path) elif isinstance(value, BaseModel): + self._init_structure_info() if tag is None: - element = ElementTree.Element(value.__class__.__name__) - self._mapping_to_xml(element, value.model_dump(mode='python')) + element.tag = value.__class__.__name__ + # by dumping the model we loose all metadata in nested data structures, + # but we have collected it when called _init_structure_info + self._mapping_to_xml(element, value.model_dump(), path) elif isinstance(value, Iterable): - for item in value: # pyright: ignore[reportUnknownVariableType] - item_el = self.to_xml(item, None) - element.append(item_el) + for n, item in enumerate(value): # pyright: ignore[reportUnknownVariableType,reportUnknownArgumentType] + element.append(self._to_xml(value=item, path=f'{path}.[{n}]' if path else f'[{n}]')) else: raise TypeError(f'Unsupported type for XML formatting: {type(value)}') return element - def _mapping_to_xml(self, element: ElementTree.Element, mapping: Mapping[Any, Any]) -> None: + def _create_element(self, tag: str, path: str) -> ElementTree.Element: + element = ElementTree.Element(tag) + if path in self._fields_info: + field_repr, field_info = self._fields_info[path] + if self.include_field_info and self.include_field_info != 'once' or field_repr not in self._included_fields: + field_attributes = self._extract_attributes(field_info) + for k, v in field_attributes.items(): + element.set(k, v) + self._included_fields.add(field_repr) + return element + + def _init_structure_info(self): + """Create maps with all data information (fields info and class names), if not already created.""" + if not self._is_info_extracted: + self._parse_data_structures(self.data) + self._is_info_extracted = True + + def _mapping_to_xml( + self, + element: ElementTree.Element, + mapping: Mapping[Any, Any], + path: str = '', + ) -> None: for key, value in mapping.items(): if isinstance(key, int): key = str(key) elif not isinstance(key, str): raise TypeError(f'Unsupported key type for XML formatting: {type(key)}, only str and int are allowed') - element.append(self.to_xml(value, key)) + element.append(self._to_xml(value=value, path=f'{path}.{key}' if path else key, tag=key)) + + def _parse_data_structures( + self, + value: Any, + path: str = '', + ): + """Parse data structures as dataclasses or Pydantic models to extract element names and attributes.""" + if value is None or isinstance(value, (str | int | float | date | bytearray | bytes | bool)): + return + elif isinstance(value, Mapping): + for k, v in value.items(): # pyright: ignore[reportUnknownVariableType] + self._parse_data_structures(v, f'{path}.{k}' if path else f'{k}') + elif is_dataclass(value) and not isinstance(value, type): + self._element_names[path] = value.__class__.__name__ + for field in fields(value): + new_path = f'{path}.{field.name}' if path else field.name + if self.include_field_info and field.metadata: + attributes = {k: v for k, v in field.metadata.items() if k in self._FIELD_ATTRIBUTES} + if attributes: + field_repr = f'{value.__class__.__name__}.{field.name}' + self._fields_info[new_path] = (field_repr, FieldInfo(**attributes)) + self._parse_data_structures(getattr(value, field.name), new_path) + elif isinstance(value, BaseModel): + self._element_names[path] = value.__class__.__name__ + for model_fields in (value.__class__.model_fields, value.__class__.model_computed_fields): + for field, info in model_fields.items(): + new_path = f'{path}.{field}' if path else field + if self.include_field_info and (isinstance(info, ComputedFieldInfo) or not info.exclude): + field_repr = f'{value.__class__.__name__}.{field}' + self._fields_info[new_path] = (field_repr, info) + self._parse_data_structures(getattr(value, field), new_path) + elif isinstance(value, Iterable): + for n, item in enumerate(value): # pyright: ignore[reportUnknownVariableType,reportUnknownArgumentType] + new_path = f'{path}.[{n}]' if path else f'[{n}]' + self._parse_data_structures(item, new_path) + + @classmethod + def _extract_attributes(cls, info: FieldInfo | ComputedFieldInfo) -> dict[str, str]: + return {attr: str(value) for attr in cls._FIELD_ATTRIBUTES if (value := getattr(info, attr, None)) is not None} def _rootless_xml_elements(root: ElementTree.Element, indent: str | None) -> Iterator[str]: diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py index 349420fe02..2acc726e8c 100644 --- a/pydantic_ai_slim/pydantic_ai/messages.py +++ b/pydantic_ai_slim/pydantic_ai/messages.py @@ -1161,11 +1161,7 @@ def otel_message_parts(self, settings: InstrumentationSettings) -> list[_otel_me if settings.include_content and part.content is not None: # pragma: no branch from .models.instrumented import InstrumentedModel - return_part['result'] = ( - part.content - if isinstance(part.content, str) - else {k: InstrumentedModel.serialize_any(v) for k, v in part.content.items()} - ) + return_part['result'] = InstrumentedModel.serialize_any(part.content) parts.append(return_part) return parts diff --git a/pydantic_ai_slim/pydantic_ai/models/google.py b/pydantic_ai_slim/pydantic_ai/models/google.py index 5dcf21f61f..18ddecca02 100644 --- a/pydantic_ai_slim/pydantic_ai/models/google.py +++ b/pydantic_ai_slim/pydantic_ai/models/google.py @@ -51,6 +51,7 @@ try: from google.genai import Client from google.genai.types import ( + BlobDict, CodeExecutionResult, CodeExecutionResultDict, ContentDict, @@ -58,6 +59,7 @@ CountTokensConfigDict, ExecutableCode, ExecutableCodeDict, + FileDataDict, FinishReason as GoogleFinishReason, FunctionCallDict, FunctionCallingConfigDict, @@ -79,6 +81,7 @@ ToolDict, ToolListUnionDict, UrlContextDict, + VideoMetadataDict, ) from ..providers.google import GoogleProvider @@ -525,17 +528,17 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]: if isinstance(item, str): content.append({'text': item}) elif isinstance(item, BinaryContent): - # NOTE: The type from Google GenAI is incorrect, it should be `str`, not `bytes`. - base64_encoded = base64.b64encode(item.data).decode('utf-8') - inline_data_dict = {'inline_data': {'data': base64_encoded, 'mime_type': item.media_type}} + inline_data_dict: BlobDict = {'data': item.data, 'mime_type': item.media_type} + part_dict: PartDict = {'inline_data': inline_data_dict} if item.vendor_metadata: - inline_data_dict['video_metadata'] = item.vendor_metadata - content.append(inline_data_dict) # type: ignore + part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata) + content.append(part_dict) elif isinstance(item, VideoUrl) and item.is_youtube: - file_data_dict = {'file_data': {'file_uri': item.url, 'mime_type': item.media_type}} + file_data_dict: FileDataDict = {'file_uri': item.url, 'mime_type': item.media_type} + part_dict: PartDict = {'file_data': file_data_dict} if item.vendor_metadata: # pragma: no branch - file_data_dict['video_metadata'] = item.vendor_metadata - content.append(file_data_dict) # type: ignore + part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata) + content.append(part_dict) elif isinstance(item, FileUrl): if item.force_download or ( # google-gla does not support passing file urls directly, except for youtube videos @@ -543,13 +546,15 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]: self.system == 'google-gla' and not item.url.startswith(r'https://generativelanguage.googleapis.com/v1beta/files') ): - downloaded_item = await download_item(item, data_format='base64') - inline_data = {'data': downloaded_item['data'], 'mime_type': downloaded_item['data_type']} - content.append({'inline_data': inline_data}) # type: ignore + downloaded_item = await download_item(item, data_format='bytes') + inline_data: BlobDict = { + 'data': downloaded_item['data'], + 'mime_type': downloaded_item['data_type'], + } + content.append({'inline_data': inline_data}) else: - content.append( - {'file_data': {'file_uri': item.url, 'mime_type': item.media_type}} - ) # pragma: lax no cover + file_data_dict: FileDataDict = {'file_uri': item.url, 'mime_type': item.media_type} + content.append({'file_data': file_data_dict}) # pragma: lax no cover else: assert_never(item) return content @@ -578,7 +583,9 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]: async for chunk in self._response: self._usage = _metadata_as_usage(chunk) - assert chunk.candidates is not None + if not chunk.candidates: + continue # pragma: no cover + candidate = chunk.candidates[0] if chunk.response_id: # pragma: no branch @@ -610,7 +617,10 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]: else: # pragma: no cover raise UnexpectedModelBehavior('Content field missing from streaming Gemini response', str(chunk)) - parts = candidate.content.parts or [] + parts = candidate.content.parts + if not parts: + continue # pragma: no cover + for part in parts: if part.thought_signature: signature = base64.b64encode(part.thought_signature).decode('utf-8') @@ -822,7 +832,7 @@ def _metadata_as_usage(response: GenerateContentResponse) -> usage.RequestUsage: if not metadata_details: continue for detail in metadata_details: - if not detail.modality or not detail.token_count: # pragma: no cover + if not detail.modality or not detail.token_count: continue details[f'{detail.modality.lower()}_{prefix}_tokens'] = detail.token_count if detail.modality != 'AUDIO': diff --git a/tests/models/cassettes/test_google/test_google_model_document_url_input.yaml b/tests/models/cassettes/test_google/test_google_model_document_url_input.yaml index 57f1c3894b..fb8cddd6cc 100644 --- a/tests/models/cassettes/test_google/test_google_model_document_url_input.yaml +++ b/tests/models/cassettes/test_google/test_google_model_document_url_input.yaml @@ -251,12 +251,10 @@ interactions: headers: accept-ranges: - bytes - age: - - '264068' alt-svc: - h3=":443"; ma=86400 cache-control: - - public, max-age=604800, s-maxage=604800 + - max-age=21600 connection: - keep-alive content-length: @@ -268,7 +266,7 @@ interactions: etag: - '"33d0-438b181451e00"' expires: - - Tue, 24 Jun 2025 13:27:15 GMT + - Fri, 19 Sep 2025 22:42:26 GMT last-modified: - Mon, 27 Aug 2007 17:15:36 GMT strict-transport-security: @@ -312,11 +310,11 @@ interactions: alt-svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 content-length: - - '776' + - '772' content-type: - application/json; charset=UTF-8 server-timing: - - gfet4t7; dur=1228 + - gfet4t7; dur=1306 transfer-encoding: - chunked vary: @@ -325,27 +323,27 @@ interactions: - Referer parsed_body: candidates: - - avgLogprobs: -0.28572704394658405 + - avgLogprobs: -0.2349079738963734 content: parts: - text: | - The document appears to be a "Dummy PDF file". + The document appears to be a dummy PDF file. role: model finishReason: STOP modelVersion: gemini-2.0-flash - responseId: 4FpeaJWYOLq3nvgP0vasuQk + responseId: 8ofNaO_lJsO1qtsP0OzFsQQ usageMetadata: - candidatesTokenCount: 12 + candidatesTokenCount: 11 candidatesTokensDetails: - modality: TEXT - tokenCount: 12 + tokenCount: 11 promptTokenCount: 1305 promptTokensDetails: - - modality: TEXT - tokenCount: 15 - modality: DOCUMENT tokenCount: 1290 - totalTokenCount: 1317 + - modality: TEXT + tokenCount: 15 + totalTokenCount: 1316 status: code: 200 message: OK diff --git a/tests/models/cassettes/test_google/test_google_model_image_as_binary_content_input.yaml b/tests/models/cassettes/test_google/test_google_model_image_as_binary_content_input.yaml index 9754e203bb..fbdabfcc80 100644 --- a/tests/models/cassettes/test_google/test_google_model_image_as_binary_content_input.yaml +++ b/tests/models/cassettes/test_google/test_google_model_image_as_binary_content_input.yaml @@ -33,11 +33,11 @@ interactions: alt-svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 content-length: - - '756' + - '754' content-type: - application/json; charset=UTF-8 server-timing: - - gfet4t7; dur=2659 + - gfet4t7; dur=1900 transfer-encoding: - chunked vary: @@ -46,14 +46,14 @@ interactions: - Referer parsed_body: candidates: - - avgLogprobs: -0.005608726706769731 + - avgLogprobs: -0.00857612325085534 content: parts: - text: The fruit in the image is a kiwi. role: model finishReason: STOP modelVersion: gemini-2.0-flash - responseId: 2VpeaPm3DaHp1PIPwK-EmAM + responseId: 4YfNaLXqOsKVmtkPqqehuAQ usageMetadata: candidatesTokenCount: 9 candidatesTokensDetails: @@ -61,10 +61,10 @@ interactions: tokenCount: 9 promptTokenCount: 3367 promptTokensDetails: - - modality: TEXT - tokenCount: 13 - modality: IMAGE tokenCount: 3354 + - modality: TEXT + tokenCount: 13 totalTokenCount: 3376 status: code: 200 diff --git a/tests/models/cassettes/test_google/test_google_model_image_url_input.yaml b/tests/models/cassettes/test_google/test_google_model_image_url_input.yaml index f2d3dff22a..a20069f23e 100644 --- a/tests/models/cassettes/test_google/test_google_model_image_url_input.yaml +++ b/tests/models/cassettes/test_google/test_google_model_image_url_input.yaml @@ -580,7 +580,7 @@ interactions: access-control-allow-origin: - '*' age: - - '1386476' + - '1500997' cache-control: - public, max-age=31536000 connection: @@ -632,11 +632,11 @@ interactions: alt-svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 content-length: - - '740' + - '738' content-type: - application/json; charset=UTF-8 server-timing: - - gfet4t7; dur=1424 + - gfet4t7; dur=867 transfer-encoding: - chunked vary: @@ -645,27 +645,26 @@ interactions: - Referer parsed_body: candidates: - - avgLogprobs: -0.1821905771891276 + - avgLogprobs: -0.18855420351028443 content: parts: - - text: | - That is a potato. + - text: That is a potato. role: model finishReason: STOP modelVersion: gemini-2.0-flash - responseId: 3VpeaPexBLq3nvgP0vasuQk + responseId: 64fNaIPHJNulqtsPx8OZsQQ usageMetadata: - candidatesTokenCount: 6 + candidatesTokenCount: 5 candidatesTokensDetails: - modality: TEXT - tokenCount: 6 + tokenCount: 5 promptTokenCount: 1817 promptTokensDetails: - modality: TEXT tokenCount: 11 - modality: IMAGE tokenCount: 1806 - totalTokenCount: 1823 + totalTokenCount: 1822 status: code: 200 message: OK diff --git a/tests/models/cassettes/test_google/test_google_model_text_as_binary_content_input.yaml b/tests/models/cassettes/test_google/test_google_model_text_as_binary_content_input.yaml index 96d18c9b04..18a5aa1e02 100644 --- a/tests/models/cassettes/test_google/test_google_model_text_as_binary_content_input.yaml +++ b/tests/models/cassettes/test_google/test_google_model_text_as_binary_content_input.yaml @@ -33,11 +33,11 @@ interactions: alt-svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 content-length: - - '712' + - '710' content-type: - application/json; charset=UTF-8 server-timing: - - gfet4t7; dur=459 + - gfet4t7; dur=571 transfer-encoding: - chunked vary: @@ -46,7 +46,7 @@ interactions: - Referer parsed_body: candidates: - - avgLogprobs: -0.014047189553578695 + - avgLogprobs: -0.2041482448577881 content: parts: - text: | @@ -54,7 +54,7 @@ interactions: role: model finishReason: STOP modelVersion: gemini-2.0-flash - responseId: 41peaPz5EtOvnvgPgYfPiQY + responseId: 9ofNaNqNKNWDmtkPs-nsqAU usageMetadata: candidatesTokenCount: 15 candidatesTokensDetails: diff --git a/tests/models/cassettes/test_google/test_google_model_text_document_url_input.yaml b/tests/models/cassettes/test_google/test_google_model_text_document_url_input.yaml index 21a9527dd2..1ebab37ebe 100644 --- a/tests/models/cassettes/test_google/test_google_model_text_document_url_input.yaml +++ b/tests/models/cassettes/test_google/test_google_model_text_document_url_input.yaml @@ -49,9 +49,11 @@ interactions: etag: - W/"61efea10-a0e" expires: - - Fri, 04 Jul 2025 08:48:34 GMT + - Fri, 26 Sep 2025 16:42:28 GMT last-modified: - Tue, 25 Jan 2022 12:16:16 GMT + strict-transport-security: + - max-age=15552000; includeSubDomains transfer-encoding: - chunked vary: @@ -93,11 +95,11 @@ interactions: alt-svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 content-length: - - '1189' + - '985' content-type: - application/json; charset=UTF-8 server-timing: - - gfet4t7; dur=802 + - gfet4t7; dur=888 transfer-encoding: - chunked vary: @@ -106,25 +108,25 @@ interactions: - Referer parsed_body: candidates: - - avgLogprobs: -0.6026712397939151 + - avgLogprobs: -0.5004191543116714 content: parts: - text: | - The main content of the document is an example of a TXT file, specifically providing information about the placeholder names "John Doe" (and related variations) used for unidentified or anonymous individuals, particularly in legal contexts in the United States and Canada. It also explains alternative names used in other countries and some additional context and examples of when "John Doe" might be used. The document also includes attribution to Wikipedia for the example content and a link to the license under which it is shared. + The main content of the TXT file is an explanation of the placeholder name "John Doe" (and related variations) and its usage in legal contexts, popular culture, and other situations where the identity of a person is unknown or needs to be withheld. The document also includes the purpose of the file and other file type information. role: model finishReason: STOP modelVersion: gemini-2.0-flash - responseId: 4lpeaLX9EYzj1PIP0MPrsAg + responseId: 9YfNaLGGDuOmqtsPoLXu4AQ usageMetadata: - candidatesTokenCount: 97 + candidatesTokenCount: 66 candidatesTokensDetails: - modality: TEXT - tokenCount: 97 + tokenCount: 66 promptTokenCount: 614 promptTokensDetails: - modality: TEXT tokenCount: 614 - totalTokenCount: 711 + totalTokenCount: 680 status: code: 200 message: OK diff --git a/tests/models/cassettes/test_google/test_google_model_video_as_binary_content_input.yaml b/tests/models/cassettes/test_google/test_google_model_video_as_binary_content_input.yaml index 240e3395c0..50d736a38c 100644 --- a/tests/models/cassettes/test_google/test_google_model_video_as_binary_content_input.yaml +++ b/tests/models/cassettes/test_google/test_google_model_video_as_binary_content_input.yaml @@ -33,11 +33,11 @@ interactions: alt-svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 content-length: - - '1384' + - '1525' content-type: - application/json; charset=UTF-8 server-timing: - - gfet4t7; dur=1776 + - gfet4t7; dur=2522 transfer-encoding: - chunked vary: @@ -46,31 +46,33 @@ interactions: - Referer parsed_body: candidates: - - avgLogprobs: -0.7381779328557371 + - avgLogprobs: -0.7307238166714892 content: parts: - - text: |- - Okay, I can describe what is visible in the image. - - The image shows a camera setup in an outdoor setting. The camera is mounted on a tripod and has an external monitor attached to it. The monitor is displaying a scene that appears to be a desert landscape with rocky formations and mountains in the background. The foreground and background of the overall image, outside of the camera monitor, is also a blurry, desert landscape. The colors in the background are warm and suggest either sunrise, sunset, or reflected light off the rock formations. - - It looks like someone is either reviewing footage on the monitor, or using it as an aid for framing the shot. + - text: "Okay! It looks like the image shows a camera monitor, likely used for professional or semi-professional + video recording. \n\nHere's what I can gather from the image:\n\n* **Camera Monitor:** The central element + is a small screen attached to a camera rig (tripod and probably camera body). These monitors are used to provide + a larger, clearer view of what the camera is recording, aiding in focus, composition, and exposure adjustments.\n* + \ **Scene on Monitor:** The screen shows an image of what appears to be a rocky mountain path or canyon with + a snow capped mountain in the distance.\n* **Background:** The background is blurred, likely the same scene + as on the camera monitor.\n\nLet me know if you want me to focus on any specific aspect or detail!" role: model finishReason: STOP modelVersion: gemini-2.0-flash - responseId: 21peaP3fE4fJ1PIPhdaDmA0 + responseId: 5YfNaKulJaGtmtkPz5eQwAQ usageMetadata: - candidatesTokenCount: 131 + candidatesTokenCount: 162 candidatesTokensDetails: - modality: TEXT - tokenCount: 131 - promptTokenCount: 270 + tokenCount: 162 + promptTokenCount: 268 promptTokensDetails: - - modality: VIDEO - tokenCount: 260 + - modality: AUDIO - modality: TEXT tokenCount: 10 - totalTokenCount: 401 + - modality: VIDEO + tokenCount: 258 + totalTokenCount: 430 status: code: 200 message: OK diff --git a/tests/models/cassettes/test_google/test_google_model_video_url_input.yaml b/tests/models/cassettes/test_google/test_google_model_video_url_input.yaml index 137a0a7db8..d8dec7a285 100644 --- a/tests/models/cassettes/test_google/test_google_model_video_url_input.yaml +++ b/tests/models/cassettes/test_google/test_google_model_video_url_input.yaml @@ -27,23 +27,24 @@ interactions: gist.github.com/assets-cdn/worker/; connect-src ''self'' uploads.github.com www.githubstatus.com collector.github.com raw.githubusercontent.com api.github.com github-cloud.s3.amazonaws.com github-production-repository-file-5c1aeb.s3.amazonaws.com github-production-upload-manifest-file-7fdce7.s3.amazonaws.com github-production-user-asset-6210df.s3.amazonaws.com - *.rel.tunnels.api.visualstudio.com wss://*.rel.tunnels.api.visualstudio.com objects-origin.githubusercontent.com copilot-proxy.githubusercontent.com - proxy.individual.githubcopilot.com proxy.business.githubcopilot.com proxy.enterprise.githubcopilot.com *.actions.githubusercontent.com - wss://*.actions.githubusercontent.com productionresultssa0.blob.core.windows.net/ productionresultssa1.blob.core.windows.net/ - productionresultssa2.blob.core.windows.net/ productionresultssa3.blob.core.windows.net/ productionresultssa4.blob.core.windows.net/ - productionresultssa5.blob.core.windows.net/ productionresultssa6.blob.core.windows.net/ productionresultssa7.blob.core.windows.net/ - productionresultssa8.blob.core.windows.net/ productionresultssa9.blob.core.windows.net/ productionresultssa10.blob.core.windows.net/ - productionresultssa11.blob.core.windows.net/ productionresultssa12.blob.core.windows.net/ productionresultssa13.blob.core.windows.net/ - productionresultssa14.blob.core.windows.net/ productionresultssa15.blob.core.windows.net/ productionresultssa16.blob.core.windows.net/ - productionresultssa17.blob.core.windows.net/ productionresultssa18.blob.core.windows.net/ productionresultssa19.blob.core.windows.net/ - github-production-repository-image-32fea6.s3.amazonaws.com github-production-release-asset-2e65be.s3.amazonaws.com - insights.github.com wss://alive.github.com api.githubcopilot.com api.individual.githubcopilot.com api.business.githubcopilot.com - api.enterprise.githubcopilot.com; font-src github.githubassets.com; form-action ''self'' github.com gist.github.com - copilot-workspace.githubnext.com objects-origin.githubusercontent.com; frame-ancestors ''none''; frame-src viewscreen.githubusercontent.com - notebooks.githubusercontent.com; img-src ''self'' data: blob: github.githubassets.com media.githubusercontent.com - camo.githubusercontent.com identicons.github.com avatars.githubusercontent.com private-avatars.githubusercontent.com - github-cloud.s3.amazonaws.com objects.githubusercontent.com release-assets.githubusercontent.com secured-user-images.githubusercontent.com/ - user-images.githubusercontent.com/ private-user-images.githubusercontent.com opengraph.githubassets.com copilotprodattachments.blob.core.windows.net/github-production-copilot-attachments/ + *.rel.tunnels.api.visualstudio.com wss://*.rel.tunnels.api.visualstudio.com github.githubassets.com objects-origin.githubusercontent.com + copilot-proxy.githubusercontent.com proxy.individual.githubcopilot.com proxy.business.githubcopilot.com proxy.enterprise.githubcopilot.com + *.actions.githubusercontent.com wss://*.actions.githubusercontent.com productionresultssa0.blob.core.windows.net/ + productionresultssa1.blob.core.windows.net/ productionresultssa2.blob.core.windows.net/ productionresultssa3.blob.core.windows.net/ + productionresultssa4.blob.core.windows.net/ productionresultssa5.blob.core.windows.net/ productionresultssa6.blob.core.windows.net/ + productionresultssa7.blob.core.windows.net/ productionresultssa8.blob.core.windows.net/ productionresultssa9.blob.core.windows.net/ + productionresultssa10.blob.core.windows.net/ productionresultssa11.blob.core.windows.net/ productionresultssa12.blob.core.windows.net/ + productionresultssa13.blob.core.windows.net/ productionresultssa14.blob.core.windows.net/ productionresultssa15.blob.core.windows.net/ + productionresultssa16.blob.core.windows.net/ productionresultssa17.blob.core.windows.net/ productionresultssa18.blob.core.windows.net/ + productionresultssa19.blob.core.windows.net/ github-production-repository-image-32fea6.s3.amazonaws.com github-production-release-asset-2e65be.s3.amazonaws.com + insights.github.com wss://alive.github.com wss://alive-staging.github.com api.githubcopilot.com api.individual.githubcopilot.com + api.business.githubcopilot.com api.enterprise.githubcopilot.com; font-src github.githubassets.com; form-action ''self'' + github.com gist.github.com copilot-workspace.githubnext.com objects-origin.githubusercontent.com; frame-ancestors + ''none''; frame-src viewscreen.githubusercontent.com notebooks.githubusercontent.com; img-src ''self'' data: blob: + github.githubassets.com media.githubusercontent.com camo.githubusercontent.com identicons.github.com avatars.githubusercontent.com + private-avatars.githubusercontent.com github-cloud.s3.amazonaws.com objects.githubusercontent.com release-assets.githubusercontent.com + secured-user-images.githubusercontent.com/ user-images.githubusercontent.com/ private-user-images.githubusercontent.com + opengraph.githubassets.com marketplace-screenshots.githubusercontent.com/ copilotprodattachments.blob.core.windows.net/github-production-copilot-attachments/ github-production-user-asset-6210df.s3.amazonaws.com customer-stories-feed.github.com spotlights-feed.github.com objects-origin.githubusercontent.com *.githubusercontent.com; manifest-src ''self''; media-src github.com user-images.githubusercontent.com/ secured-user-images.githubusercontent.com/ private-user-images.githubusercontent.com github-production-user-asset-6210df.s3.amazonaws.com gist.github.com; script-src @@ -11091,7 +11092,7 @@ interactions: etag: - W/"0e8fc2491b5d905053d769cebf41efa65670fdce574bc3d0a132431e025608d7" expires: - - Fri, 27 Jun 2025 08:53:30 GMT + - Fri, 19 Sep 2025 16:47:21 GMT source-age: - '0' strict-transport-security: @@ -11135,11 +11136,11 @@ interactions: alt-svc: - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 content-length: - - '1068' + - '1381' content-type: - application/json; charset=UTF-8 server-timing: - - gfet4t7; dur=1439 + - gfet4t7; dur=2356 transfer-encoding: - chunked vary: @@ -11148,33 +11149,32 @@ interactions: - Referer parsed_body: candidates: - - avgLogprobs: -0.7620539724090953 + - avgLogprobs: -0.8202069508943627 content: parts: - - text: |- - Okay, based on the image, here's what I can infer: - - * **A camera monitor is mounted on top of a camera.** - * **The monitor's screen is on, displaying a view of the rocky mountains.** - * **This setting suggests a professional video shoot.** - - If you'd like a more detailed explanation, please provide additional information about the video. + - text: "Certainly! Based on the image you sent, it appears to be a setup for filming or photography. \n\nHere's + what I can observe:\n\n* **Camera Monitor:** There is a monitor mounted on a tripod, displaying a shot of + a canyon or mountain landscape.\n* **Camera/Recording Device:** Below the monitor, there is a camera or some + other kind of recording device.\n* **Landscape Backdrop:** In the background, there is a similar-looking landscape + to what's being displayed on the screen.\n\nIn summary, it looks like the image shows a camera setup, perhaps + in the process of filming, with a monitor to review the footage." role: model finishReason: STOP modelVersion: gemini-2.0-flash - responseId: 31peaJ-RIOrIx_APoM6e2A4 + responseId: 7ofNaJWHAbCtmtkPkoSwwQU usageMetadata: - candidatesTokenCount: 81 + candidatesTokenCount: 139 candidatesTokensDetails: - modality: TEXT - tokenCount: 81 - promptTokenCount: 270 + tokenCount: 139 + promptTokenCount: 268 promptTokensDetails: + - modality: VIDEO + tokenCount: 258 - modality: TEXT tokenCount: 10 - - modality: VIDEO - tokenCount: 260 - totalTokenCount: 351 + - modality: AUDIO + totalTokenCount: 407 status: code: 200 message: OK diff --git a/tests/models/test_google.py b/tests/models/test_google.py index 989ef80d85..b3afeafa3f 100644 --- a/tests/models/test_google.py +++ b/tests/models/test_google.py @@ -662,11 +662,15 @@ async def test_google_model_video_as_binary_content_input( result = await agent.run(['Explain me this video', video_content]) assert result.output == snapshot("""\ -Okay, I can describe what is visible in the image. +Okay! It looks like the image shows a camera monitor, likely used for professional or semi-professional video recording. \n\ -The image shows a camera setup in an outdoor setting. The camera is mounted on a tripod and has an external monitor attached to it. The monitor is displaying a scene that appears to be a desert landscape with rocky formations and mountains in the background. The foreground and background of the overall image, outside of the camera monitor, is also a blurry, desert landscape. The colors in the background are warm and suggest either sunrise, sunset, or reflected light off the rock formations. +Here's what I can gather from the image: -It looks like someone is either reviewing footage on the monitor, or using it as an aid for framing the shot.\ +* **Camera Monitor:** The central element is a small screen attached to a camera rig (tripod and probably camera body). These monitors are used to provide a larger, clearer view of what the camera is recording, aiding in focus, composition, and exposure adjustments. +* **Scene on Monitor:** The screen shows an image of what appears to be a rocky mountain path or canyon with a snow capped mountain in the distance. +* **Background:** The background is blurred, likely the same scene as on the camera monitor. + +Let me know if you want me to focus on any specific aspect or detail!\ """) @@ -697,7 +701,7 @@ async def test_google_model_image_url_input(allow_model_requests: None, google_p ImageUrl(url='https://t3.ftcdn.net/jpg/00/85/79/92/360_F_85799278_0BBGV9OAdQDTLnKwAPBCcg1J7QtiieJY.jpg'), ] ) - assert result.output == snapshot('That is a potato.\n') + assert result.output == snapshot('That is a potato.') async def test_google_model_video_url_input(allow_model_requests: None, google_provider: GoogleProvider): @@ -711,13 +715,15 @@ async def test_google_model_video_url_input(allow_model_requests: None, google_p ] ) assert result.output == snapshot("""\ -Okay, based on the image, here's what I can infer: +Certainly! Based on the image you sent, it appears to be a setup for filming or photography. \n\ -* **A camera monitor is mounted on top of a camera.** -* **The monitor's screen is on, displaying a view of the rocky mountains.** -* **This setting suggests a professional video shoot.** +Here's what I can observe: -If you'd like a more detailed explanation, please provide additional information about the video.\ +* **Camera Monitor:** There is a monitor mounted on a tripod, displaying a shot of a canyon or mountain landscape. +* **Camera/Recording Device:** Below the monitor, there is a camera or some other kind of recording device. +* **Landscape Backdrop:** In the background, there is a similar-looking landscape to what's being displayed on the screen. + +In summary, it looks like the image shows a camera setup, perhaps in the process of filming, with a monitor to review the footage.\ """) @@ -754,7 +760,7 @@ async def test_google_model_document_url_input(allow_model_requests: None, googl document_url = DocumentUrl(url='https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf') result = await agent.run(['What is the main content on this document?', document_url]) - assert result.output == snapshot('The document appears to be a "Dummy PDF file".\n') + assert result.output == snapshot('The document appears to be a dummy PDF file.\n') async def test_google_model_text_document_url_input(allow_model_requests: None, google_provider: GoogleProvider): @@ -765,7 +771,7 @@ async def test_google_model_text_document_url_input(allow_model_requests: None, result = await agent.run(['What is the main content on this document?', text_document_url]) assert result.output == snapshot( - 'The main content of the document is an example of a TXT file, specifically providing information about the placeholder names "John Doe" (and related variations) used for unidentified or anonymous individuals, particularly in legal contexts in the United States and Canada. It also explains alternative names used in other countries and some additional context and examples of when "John Doe" might be used. The document also includes attribution to Wikipedia for the example content and a link to the license under which it is shared.\n' + 'The main content of the TXT file is an explanation of the placeholder name "John Doe" (and related variations) and its usage in legal contexts, popular culture, and other situations where the identity of a person is unknown or needs to be withheld. The document also includes the purpose of the file and other file type information.\n' ) diff --git a/tests/models/test_groq.py b/tests/models/test_groq.py index dc4e554e4c..cb01f8cd50 100644 --- a/tests/models/test_groq.py +++ b/tests/models/test_groq.py @@ -15,7 +15,7 @@ from pydantic import BaseModel from typing_extensions import TypedDict -from pydantic_ai import Agent, ModelHTTPError, ModelRetry, UnexpectedModelBehavior +from pydantic_ai import Agent, ModelHTTPError, ModelRetry from pydantic_ai.builtin_tools import WebSearchTool from pydantic_ai.messages import ( BinaryContent, @@ -533,17 +533,6 @@ async def test_stream_structured_finish_reason(allow_model_requests: None): assert result.is_complete -async def test_no_content(allow_model_requests: None): - stream = chunk([ChoiceDelta()]), chunk([ChoiceDelta()]) - mock_client = MockGroq.create_mock_stream(stream) - m = GroqModel('llama-3.3-70b-versatile', provider=GroqProvider(groq_client=mock_client)) - agent = Agent(m, output_type=MyTypedDict) - - with pytest.raises(UnexpectedModelBehavior, match='Received empty model response'): - async with agent.run_stream(''): - pass - - async def test_no_delta(allow_model_requests: None): stream = chunk([]), text_chunk('hello '), text_chunk('world') mock_client = MockGroq.create_mock_stream(stream) diff --git a/tests/models/test_huggingface.py b/tests/models/test_huggingface.py index db7d40f5fc..f88be32156 100644 --- a/tests/models/test_huggingface.py +++ b/tests/models/test_huggingface.py @@ -13,7 +13,7 @@ from inline_snapshot import snapshot from typing_extensions import TypedDict -from pydantic_ai import Agent, ModelRetry, UnexpectedModelBehavior +from pydantic_ai import Agent, ModelRetry from pydantic_ai.exceptions import ModelHTTPError from pydantic_ai.messages import ( AudioUrl, @@ -601,20 +601,6 @@ async def test_stream_structured_finish_reason(allow_model_requests: None): assert result.is_complete -async def test_no_content(allow_model_requests: None): - stream = [ - chunk([ChatCompletionStreamOutputDelta(role='assistant')]), # type: ignore - chunk([ChatCompletionStreamOutputDelta(role='assistant')]), # type: ignore - ] - mock_client = MockHuggingFace.create_stream_mock(stream) - m = HuggingFaceModel('hf-model', provider=HuggingFaceProvider(hf_client=mock_client, api_key='x')) - agent = Agent(m, output_type=MyTypedDict) - - with pytest.raises(UnexpectedModelBehavior, match='Received empty model response'): - async with agent.run_stream(''): - pass - - async def test_no_delta(allow_model_requests: None): stream = [ chunk([]), diff --git a/tests/models/test_instrumented.py b/tests/models/test_instrumented.py index f759fa1d56..f48dc87404 100644 --- a/tests/models/test_instrumented.py +++ b/tests/models/test_instrumented.py @@ -1359,6 +1359,22 @@ def test_message_with_builtin_tool_calls(): BuiltinToolCallPart('code_execution', {'code': '2 * 2'}, tool_call_id='tool_call_1'), BuiltinToolReturnPart('code_execution', {'output': '4'}, tool_call_id='tool_call_1'), TextPart('text2'), + BuiltinToolCallPart( + 'web_search', + '{"query": "weather: San Francisco, CA", "type": "search"}', + tool_call_id='tool_call_2', + ), + BuiltinToolReturnPart( + 'web_search', + [ + { + 'url': 'https://www.weather.com/weather/today/l/USCA0987:1:US', + 'title': 'Weather in San Francisco', + } + ], + tool_call_id='tool_call_2', + ), + TextPart('text3'), ] ), ] @@ -1387,6 +1403,26 @@ def test_message_with_builtin_tool_calls(): 'result': {'output': '4'}, }, {'type': 'text', 'content': 'text2'}, + { + 'type': 'tool_call', + 'id': 'tool_call_2', + 'name': 'web_search', + 'builtin': True, + 'arguments': '{"query": "weather: San Francisco, CA", "type": "search"}', + }, + { + 'type': 'tool_call_response', + 'id': 'tool_call_2', + 'name': 'web_search', + 'builtin': True, + 'result': [ + { + 'url': 'https://www.weather.com/weather/today/l/USCA0987:1:US', + 'title': 'Weather in San Francisco', + } + ], + }, + {'type': 'text', 'content': 'text3'}, ], } ] diff --git a/tests/models/test_openai.py b/tests/models/test_openai.py index c0d3a92b5b..97bd0d6a16 100644 --- a/tests/models/test_openai.py +++ b/tests/models/test_openai.py @@ -606,17 +606,6 @@ async def test_stream_text_empty_think_tag_and_text_before_tool_call(allow_model assert await result.get_output() == snapshot({'first': 'One', 'second': 'Two'}) -async def test_no_content(allow_model_requests: None): - stream = [chunk([ChoiceDelta()]), chunk([ChoiceDelta()])] - mock_client = MockOpenAI.create_mock_stream(stream) - m = OpenAIChatModel('gpt-4o', provider=OpenAIProvider(openai_client=mock_client)) - agent = Agent(m, output_type=MyTypedDict) - - with pytest.raises(UnexpectedModelBehavior, match='Received empty model response'): - async with agent.run_stream(''): - pass - - async def test_no_delta(allow_model_requests: None): stream = [ chunk([]), diff --git a/tests/test_agent.py b/tests/test_agent.py index 8c90db56b5..f8f622b3de 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -2168,14 +2168,79 @@ def simple_response(_messages: list[ModelMessage], _info: AgentInfo) -> ModelRes assert result.new_messages() == [] -def test_empty_tool_calls(): - def empty(_: list[ModelMessage], _info: AgentInfo) -> ModelResponse: +def test_empty_response(): + def llm(messages: list[ModelMessage], _info: AgentInfo) -> ModelResponse: + if len(messages) == 1: + return ModelResponse(parts=[]) + else: + return ModelResponse(parts=[TextPart('ok here is text')]) + + agent = Agent(FunctionModel(llm)) + + result = agent.run_sync('Hello') + + assert result.all_messages() == snapshot( + [ + ModelRequest( + parts=[ + UserPromptPart( + content='Hello', + timestamp=IsDatetime(), + ) + ] + ), + ModelResponse( + parts=[], + usage=RequestUsage(input_tokens=51), + model_name='function:llm:', + timestamp=IsDatetime(), + ), + ModelRequest(parts=[]), + ModelResponse( + parts=[TextPart(content='ok here is text')], + usage=RequestUsage(input_tokens=51, output_tokens=4), + model_name='function:llm:', + timestamp=IsDatetime(), + ), + ] + ) + + +def test_empty_response_without_recovery(): + def llm(messages: list[ModelMessage], _info: AgentInfo) -> ModelResponse: return ModelResponse(parts=[]) - agent = Agent(FunctionModel(empty)) + agent = Agent(FunctionModel(llm), output_type=tuple[str, int]) - with pytest.raises(UnexpectedModelBehavior, match='Received empty model response'): - agent.run_sync('Hello') + with capture_run_messages() as messages: + with pytest.raises(UnexpectedModelBehavior, match=r'Exceeded maximum retries \(1\) for output validation'): + agent.run_sync('Hello') + + assert messages == snapshot( + [ + ModelRequest( + parts=[ + UserPromptPart( + content='Hello', + timestamp=IsDatetime(), + ) + ] + ), + ModelResponse( + parts=[], + usage=RequestUsage(input_tokens=51), + model_name='function:llm:', + timestamp=IsDatetime(), + ), + ModelRequest(parts=[]), + ModelResponse( + parts=[], + usage=RequestUsage(input_tokens=51), + model_name='function:llm:', + timestamp=IsDatetime(), + ), + ] + ) def test_unknown_tool(): diff --git a/tests/test_format_as_xml.py b/tests/test_format_as_xml.py index 37053a67f9..001447dd9a 100644 --- a/tests/test_format_as_xml.py +++ b/tests/test_format_as_xml.py @@ -1,10 +1,14 @@ -from dataclasses import dataclass +from __future__ import annotations as _annotations + +from dataclasses import dataclass, field from datetime import date, datetime from typing import Any import pytest from inline_snapshot import snapshot -from pydantic import BaseModel +from pydantic import BaseModel, Field, computed_field +from pydantic.dataclasses import dataclass as pydantic_dataclass +from typing_extensions import Self from pydantic_ai import format_as_xml @@ -20,6 +24,19 @@ class ExamplePydanticModel(BaseModel): age: int +class ExamplePydanticFields(BaseModel): + name: str = Field(description="The person's name") + age: int = Field(description='Years', title='Age', default=18) + height: float = Field(description="The person's height", exclude=True) + children: list[Self] | None = Field(title='child', alias='child_list', default=None) + + @computed_field(title='Location') + def location(self) -> str | None: + if self.name == 'John': + return 'Australia' + return None + + @pytest.mark.parametrize( 'input_obj,output', [ @@ -124,7 +141,373 @@ class ExamplePydanticModel(BaseModel): ], ) def test_root_tag(input_obj: Any, output: str): - assert format_as_xml(input_obj, root_tag='examples', item_tag='example') == output + assert format_as_xml(input_obj, root_tag='examples', item_tag='example', include_field_info=False) == output + assert format_as_xml(input_obj, root_tag='examples', item_tag='example', include_field_info='once') == output + + +@pytest.mark.parametrize( + 'input_obj,use_fields,output', + [ + pytest.param( + ExamplePydanticFields( + name='John', + age=42, + height=160.0, + child_list=[ + ExamplePydanticFields(name='Liam', height=150), + ExamplePydanticFields(name='Alice', height=160), + ], + ), + 'once', + snapshot("""\ +John +42 + + + Liam + 18 + null + null + + + Alice + 18 + null + null + + +Australia\ +"""), + id='pydantic model with fields', + ), + pytest.param( + [ + ExamplePydanticFields( + name='John', + age=42, + height=160.0, + child_list=[ + ExamplePydanticFields(name='Liam', height=150), + ExamplePydanticFields(name='Alice', height=160), + ], + ) + ], + 'once', + snapshot("""\ + + John + 42 + + + Liam + 18 + null + null + + + Alice + 18 + null + null + + + Australia +\ +"""), + id='list[pydantic model with fields]', + ), + pytest.param( + ExamplePydanticFields( + name='John', + age=42, + height=160.0, + child_list=[ + ExamplePydanticFields(name='Liam', height=150), + ExamplePydanticFields(name='Alice', height=160), + ], + ), + False, + snapshot("""\ +John +42 + + + Liam + 18 + null + null + + + Alice + 18 + null + null + + +Australia\ +"""), + id='pydantic model without fields', + ), + ], +) +def test_fields(input_obj: Any, use_fields: bool, output: str): + assert format_as_xml(input_obj, include_field_info=use_fields) == output + + +def test_repeated_field_attributes(): + class DataItem(BaseModel): + user1: ExamplePydanticFields + user2: ExamplePydanticFields + + data = ExamplePydanticFields( + name='John', + age=42, + height=160.0, + child_list=[ + ExamplePydanticFields(name='Liam', height=150), + ExamplePydanticFields(name='Alice', height=160), + ], + ) + assert ( + format_as_xml(data, include_field_info=True) + == """\ +John +42 + + + Liam + 18 + null + null + + + Alice + 18 + null + null + + +Australia\ +""" + ) + + assert ( + format_as_xml(DataItem(user1=data, user2=data.model_copy()), include_field_info=True) + == """\ + + John + 42 + + + Liam + 18 + null + null + + + Alice + 18 + null + null + + + Australia + + + John + 42 + + + Liam + 18 + null + null + + + Alice + 18 + null + null + + + Australia +\ +""" + ) + + assert ( + format_as_xml(DataItem(user1=data, user2=data.model_copy()), include_field_info='once') + == """\ + + John + 42 + + + Liam + 18 + null + null + + + Alice + 18 + null + null + + + Australia + + + John + 42 + + + Liam + 18 + null + null + + + Alice + 18 + null + null + + + Australia +\ +""" + ) + + +def test_nested_data(): + @dataclass + class DataItem1: + id: str | None = None + source: str = field(default='none', metadata={'description': 'the source', 'date': '19990805'}) + + class ModelItem1(BaseModel): + name: str = Field(description='Name') + value: int + items: list[DataItem1] = Field(description='Items') + + @pydantic_dataclass + class DataItem2: + model: ModelItem1 = field(metadata={'title': 'the model', 'description': 'info'}) + others: tuple[ModelItem1] | None = None + count: int = field(default=10, metadata={'info': 'a count'}) + + data = { + 'values': [ + DataItem2( + ModelItem1(name='Alice', value=42, items=[DataItem1('xyz')]), + (ModelItem1(name='Liam', value=3, items=[]),), + ), + DataItem2( + ModelItem1( + name='Bob', + value=7, + items=[ + DataItem1('a'), + DataItem1(source='xx'), + ], + ), + count=42, + ), + ] + } + + assert ( + format_as_xml(data, include_field_info='once') + == """ + + + + Alice + 42 + + + xyz + none + + + + + + Liam + 3 + + + + 10 + + + + Bob + 7 + + + a + none + + + null + xx + + + + null + 42 + + +""".strip() + ) + + assert ( + format_as_xml(data, include_field_info=False) + == """ + + + + Alice + 42 + + + xyz + none + + + + + + Liam + 3 + + + + 10 + + + + Bob + 7 + + + a + none + + + null + xx + + + + null + 42 + + +""".strip() + ) @pytest.mark.parametrize( @@ -194,6 +577,15 @@ def test_invalid_key(): format_as_xml({(1, 2): 42}) +def test_parse_invalid_value(): + class Invalid(BaseModel): + name: str = Field(default='Alice', title='Name') + bad: Any = object() + + with pytest.raises(TypeError, match='Unsupported type'): + format_as_xml(Invalid(), include_field_info='once') + + def test_set(): assert '1' in format_as_xml({1, 2, 3}, item_tag='example') diff --git a/tests/test_streaming.py b/tests/test_streaming.py index 6e49804d58..7a73a5b27e 100644 --- a/tests/test_streaming.py +++ b/tests/test_streaming.py @@ -400,15 +400,47 @@ async def ret_a(x: str) -> str: ) -async def test_call_tool_empty(): - async def stream_structured_function(_messages: list[ModelMessage], _: AgentInfo) -> AsyncIterator[DeltaToolCalls]: - yield {} +async def test_empty_response(): + async def stream_structured_function( + messages: list[ModelMessage], _: AgentInfo + ) -> AsyncIterator[DeltaToolCalls | str]: + if len(messages) == 1: + yield {} + else: + yield 'ok here is text' - agent = Agent(FunctionModel(stream_function=stream_structured_function), output_type=tuple[str, int]) + agent = Agent(FunctionModel(stream_function=stream_structured_function)) - with pytest.raises(UnexpectedModelBehavior, match='Received empty model response'): - async with agent.run_stream('hello'): - pass + async with agent.run_stream('hello') as result: + response = await result.get_output() + assert response == snapshot('ok here is text') + messages = result.all_messages() + + assert messages == snapshot( + [ + ModelRequest( + parts=[ + UserPromptPart( + content='hello', + timestamp=IsDatetime(), + ) + ] + ), + ModelResponse( + parts=[], + usage=RequestUsage(input_tokens=50), + model_name='function::stream_structured_function', + timestamp=IsDatetime(), + ), + ModelRequest(parts=[]), + ModelResponse( + parts=[TextPart(content='ok here is text')], + usage=RequestUsage(input_tokens=50, output_tokens=4), + model_name='function::stream_structured_function', + timestamp=IsDatetime(), + ), + ] + ) async def test_call_tool_wrong_name():