Skip to content

Commit 41f7b59

Browse files
weiguangli-ioclaudemoonbox3
authored
Python: fix Google AI/Vertex AI crash on anyOf schema (#12442) (#13624)
### Motivation and Context Closes #12442 When `ChatCompletionAgent` instances are used as plugins with `GoogleAIChatCompletion`, the application crashes with: ``` ValueError: Unknown field for Schema: anyOf ``` **Root cause**: The Semantic Kernel JSON schema builder generates `anyOf` for Union types (e.g. `str | list[str]` on the agent's `messages` parameter) and type-as-array for Optional types (e.g. `["string", "null"]`). Google AI's protobuf `Schema` does not support these constructs. ### Description Add `sanitize_schema_for_google_ai()` to `shared_utils.py` that recursively rewrites unsupported schema constructs: - `anyOf` / `oneOf` with a null variant → single type + `nullable: true` - `anyOf` / `oneOf` without null → picks the first variant - `type` as array (e.g. `["string", "null"]`) → single type + `nullable: true` Applied in both `kernel_function_metadata_to_google_ai_function_call_format()` and `kernel_function_metadata_to_vertex_ai_function_call_format()`. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) - [x] The existing tests pass, and I have added 12 new regression tests - [x] I didn't break any existing functionality > [!NOTE] > AI-assisted contribution: Claude helped with code review, test generation, and formatting. --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Evan Mattson <35585003+moonbox3@users.noreply.github.com>
1 parent 83ff0a5 commit 41f7b59

File tree

6 files changed

+340
-10
lines changed

6 files changed

+340
-10
lines changed

python/semantic_kernel/connectors/ai/google/google_ai/services/utils.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from semantic_kernel.connectors.ai.google.shared_utils import (
1414
FUNCTION_CHOICE_TYPE_TO_GOOGLE_FUNCTION_CALLING_MODE,
1515
GEMINI_FUNCTION_NAME_SEPARATOR,
16+
sanitize_schema_for_google_ai,
1617
)
1718
from semantic_kernel.contents.chat_message_content import ChatMessageContent
1819
from semantic_kernel.contents.function_call_content import FunctionCallContent
@@ -147,16 +148,23 @@ def format_tool_message(message: ChatMessageContent) -> list[Part]:
147148

148149
def kernel_function_metadata_to_google_ai_function_call_format(metadata: KernelFunctionMetadata) -> dict[str, Any]:
149150
"""Convert the kernel function metadata to function calling format."""
151+
parameters: dict[str, Any] | None = None
152+
if metadata.parameters:
153+
properties = {}
154+
for param in metadata.parameters:
155+
if param.name is None:
156+
continue
157+
prop_schema = sanitize_schema_for_google_ai(param.schema_data) if param.schema_data else param.schema_data
158+
properties[param.name] = prop_schema
159+
parameters = {
160+
"type": "object",
161+
"properties": properties,
162+
"required": [p.name for p in metadata.parameters if p.is_required and p.name is not None],
163+
}
150164
return {
151165
"name": metadata.custom_fully_qualified_name(GEMINI_FUNCTION_NAME_SEPARATOR),
152166
"description": metadata.description or "",
153-
"parameters": {
154-
"type": "object",
155-
"properties": {param.name: param.schema_data for param in metadata.parameters},
156-
"required": [p.name for p in metadata.parameters if p.is_required],
157-
}
158-
if metadata.parameters
159-
else None,
167+
"parameters": parameters,
160168
}
161169

162170

python/semantic_kernel/connectors/ai/google/shared_utils.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# Copyright (c) Microsoft. All rights reserved.
22

33
import logging
4+
from copy import deepcopy
5+
from typing import Any
46

57
from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
68
from semantic_kernel.const import DEFAULT_FULLY_QUALIFIED_NAME_SEPARATOR
@@ -51,6 +53,64 @@ def format_gemini_function_name_to_kernel_function_fully_qualified_name(gemini_f
5153
return gemini_function_name
5254

5355

56+
def sanitize_schema_for_google_ai(schema: dict[str, Any] | None) -> dict[str, Any] | None:
57+
"""Sanitize a JSON schema dict so it is compatible with Google AI / Vertex AI.
58+
59+
The Google AI protobuf ``Schema`` does not support ``anyOf``, ``oneOf``, or
60+
``allOf``. It also does not accept ``type`` as an array (e.g.
61+
``["string", "null"]``). This helper recursively rewrites those constructs
62+
into the subset that Google AI understands, using ``nullable`` where
63+
appropriate.
64+
"""
65+
if schema is None:
66+
return None
67+
68+
schema = deepcopy(schema)
69+
return _sanitize_node(schema)
70+
71+
72+
def _sanitize_node(node: dict[str, Any]) -> dict[str, Any]:
73+
"""Recursively sanitize a single schema node."""
74+
# --- handle ``type`` given as a list (e.g. ["string", "null"]) ---
75+
type_val = node.get("type")
76+
if isinstance(type_val, list):
77+
non_null = [t for t in type_val if t != "null"]
78+
if len(type_val) != len(non_null):
79+
node["nullable"] = True
80+
node["type"] = non_null[0] if non_null else "string"
81+
82+
# --- handle ``anyOf`` / ``oneOf`` / ``allOf`` ---
83+
for key in ("anyOf", "oneOf", "allOf"):
84+
variants = node.get(key)
85+
if not variants:
86+
continue
87+
non_null = [v for v in variants if v.get("type") != "null"]
88+
has_null = len(variants) != len(non_null)
89+
chosen = _sanitize_node(non_null[0]) if non_null else {"type": "string"}
90+
# Preserve description from the outer node
91+
desc = node.get("description")
92+
node.clear()
93+
node.update(chosen)
94+
if has_null:
95+
node["nullable"] = True
96+
if desc and "description" not in node:
97+
node["description"] = desc
98+
break # only process the first matching key
99+
100+
# --- recurse into nested structures ---
101+
props = node.get("properties")
102+
if isinstance(props, dict):
103+
for prop_name, prop_schema in props.items():
104+
if isinstance(prop_schema, dict):
105+
props[prop_name] = _sanitize_node(prop_schema)
106+
107+
items = node.get("items")
108+
if isinstance(items, dict):
109+
node["items"] = _sanitize_node(items)
110+
111+
return node
112+
113+
54114
def collapse_function_call_results_in_chat_history(chat_history: ChatHistory):
55115
"""The Gemini API expects the results of parallel function calls to be contained in a single message to be returned.
56116

python/semantic_kernel/connectors/ai/google/vertex_ai/services/utils.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from semantic_kernel.connectors.ai.google.shared_utils import (
1212
FUNCTION_CHOICE_TYPE_TO_GOOGLE_FUNCTION_CALLING_MODE,
1313
GEMINI_FUNCTION_NAME_SEPARATOR,
14+
sanitize_schema_for_google_ai,
1415
)
1516
from semantic_kernel.connectors.ai.google.vertex_ai.vertex_ai_prompt_execution_settings import (
1617
VertexAIChatPromptExecutionSettings,
@@ -137,13 +138,20 @@ def format_tool_message(message: ChatMessageContent) -> list[Part]:
137138

138139
def kernel_function_metadata_to_vertex_ai_function_call_format(metadata: KernelFunctionMetadata) -> FunctionDeclaration:
139140
"""Convert the kernel function metadata to function calling format."""
141+
properties: dict[str, Any] = {}
142+
if metadata.parameters:
143+
for param in metadata.parameters:
144+
if param.name is None:
145+
continue
146+
prop_schema = sanitize_schema_for_google_ai(param.schema_data) if param.schema_data else param.schema_data
147+
properties[param.name] = prop_schema
140148
return FunctionDeclaration(
141149
name=metadata.custom_fully_qualified_name(GEMINI_FUNCTION_NAME_SEPARATOR),
142150
description=metadata.description or "",
143151
parameters={
144152
"type": "object",
145-
"properties": {param.name: param.schema_data for param in metadata.parameters},
146-
"required": [p.name for p in metadata.parameters if p.is_required],
153+
"properties": properties,
154+
"required": [p.name for p in metadata.parameters if p.is_required and p.name is not None],
147155
},
148156
)
149157

python/tests/unit/connectors/ai/google/google_ai/services/test_google_ai_utils.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
finish_reason_from_google_ai_to_semantic_kernel,
88
format_assistant_message,
99
format_user_message,
10+
kernel_function_metadata_to_google_ai_function_call_format,
1011
)
1112
from semantic_kernel.contents.chat_message_content import ChatMessageContent
1213
from semantic_kernel.contents.function_call_content import FunctionCallContent
@@ -16,6 +17,8 @@
1617
from semantic_kernel.contents.utils.author_role import AuthorRole
1718
from semantic_kernel.contents.utils.finish_reason import FinishReason as SemanticKernelFinishReason
1819
from semantic_kernel.exceptions.service_exceptions import ServiceInvalidRequestError
20+
from semantic_kernel.functions.kernel_function_metadata import KernelFunctionMetadata
21+
from semantic_kernel.functions.kernel_parameter_metadata import KernelParameterMetadata
1922

2023

2124
def test_finish_reason_from_google_ai_to_semantic_kernel():
@@ -157,3 +160,42 @@ def test_format_assistant_message_without_thought_signature() -> None:
157160
assert formatted[0].function_call.name == "test_function"
158161
assert formatted[0].function_call.args == {"arg1": "value1"}
159162
assert not getattr(formatted[0], "thought_signature", None)
163+
164+
165+
def test_google_ai_function_call_format_sanitizes_anyof_schema() -> None:
166+
"""Integration test: anyOf in param schema_data is sanitized in the output dict."""
167+
metadata = KernelFunctionMetadata(
168+
name="test_func",
169+
description="A test function",
170+
is_prompt=False,
171+
parameters=[
172+
KernelParameterMetadata(
173+
name="messages",
174+
description="The user messages",
175+
is_required=True,
176+
schema_data={
177+
"anyOf": [
178+
{"type": "string"},
179+
{"type": "array", "items": {"type": "string"}},
180+
],
181+
"description": "The user messages",
182+
},
183+
),
184+
],
185+
)
186+
result = kernel_function_metadata_to_google_ai_function_call_format(metadata)
187+
param_schema = result["parameters"]["properties"]["messages"]
188+
assert "anyOf" not in param_schema
189+
assert param_schema["type"] == "string"
190+
191+
192+
def test_google_ai_function_call_format_empty_parameters() -> None:
193+
"""Integration test: metadata with no parameters produces parameters=None."""
194+
metadata = KernelFunctionMetadata(
195+
name="no_params_func",
196+
description="No parameters",
197+
is_prompt=False,
198+
parameters=[],
199+
)
200+
result = kernel_function_metadata_to_google_ai_function_call_format(metadata)
201+
assert result["parameters"] is None

python/tests/unit/connectors/ai/google/test_shared_utils.py

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
collapse_function_call_results_in_chat_history,
1111
filter_system_message,
1212
format_gemini_function_name_to_kernel_function_fully_qualified_name,
13+
sanitize_schema_for_google_ai,
1314
)
1415
from semantic_kernel.contents.chat_history import ChatHistory
1516
from semantic_kernel.contents.chat_message_content import ChatMessageContent
@@ -94,3 +95,174 @@ def test_collapse_function_call_results_in_chat_history() -> None:
9495
collapse_function_call_results_in_chat_history(chat_history)
9596
assert len(chat_history.messages) == 7
9697
assert len(chat_history.messages[1].items) == 2
98+
99+
100+
# --- sanitize_schema_for_google_ai tests ---
101+
102+
103+
def test_sanitize_schema_none():
104+
"""Test that None input returns None."""
105+
assert sanitize_schema_for_google_ai(None) is None
106+
107+
108+
def test_sanitize_schema_simple_passthrough():
109+
"""Test that a simple schema passes through unchanged."""
110+
schema = {"type": "string", "description": "A name"}
111+
result = sanitize_schema_for_google_ai(schema)
112+
assert result == {"type": "string", "description": "A name"}
113+
114+
115+
def test_sanitize_schema_type_as_list_with_null():
116+
"""type: ["string", "null"] should become type: "string" + nullable: true."""
117+
schema = {"type": ["string", "null"], "description": "Optional field"}
118+
result = sanitize_schema_for_google_ai(schema)
119+
assert result == {"type": "string", "nullable": True, "description": "Optional field"}
120+
121+
122+
def test_sanitize_schema_type_as_list_without_null():
123+
"""type: ["string", "integer"] should pick the first type."""
124+
schema = {"type": ["string", "integer"]}
125+
result = sanitize_schema_for_google_ai(schema)
126+
assert result == {"type": "string"}
127+
128+
129+
def test_sanitize_schema_anyof_with_null():
130+
"""AnyOf with null variant should become the non-null type + nullable."""
131+
schema = {
132+
"anyOf": [{"type": "string"}, {"type": "null"}],
133+
"description": "Optional param",
134+
}
135+
result = sanitize_schema_for_google_ai(schema)
136+
assert result == {"type": "string", "nullable": True, "description": "Optional param"}
137+
138+
139+
def test_sanitize_schema_anyof_without_null():
140+
"""AnyOf without null should pick the first variant."""
141+
schema = {
142+
"anyOf": [
143+
{"type": "string"},
144+
{"type": "array", "items": {"type": "string"}},
145+
],
146+
}
147+
result = sanitize_schema_for_google_ai(schema)
148+
assert result == {"type": "string"}
149+
150+
151+
def test_sanitize_schema_oneof():
152+
"""OneOf should be handled the same as anyOf."""
153+
schema = {
154+
"oneOf": [{"type": "integer"}, {"type": "null"}],
155+
}
156+
result = sanitize_schema_for_google_ai(schema)
157+
assert result == {"type": "integer", "nullable": True}
158+
159+
160+
def test_sanitize_schema_nested_properties():
161+
"""AnyOf inside nested properties should be sanitized recursively."""
162+
schema = {
163+
"type": "object",
164+
"properties": {
165+
"name": {"type": "string"},
166+
"value": {"anyOf": [{"type": "number"}, {"type": "null"}]},
167+
},
168+
}
169+
result = sanitize_schema_for_google_ai(schema)
170+
assert result == {
171+
"type": "object",
172+
"properties": {
173+
"name": {"type": "string"},
174+
"value": {"type": "number", "nullable": True},
175+
},
176+
}
177+
178+
179+
def test_sanitize_schema_nested_items():
180+
"""AnyOf inside array items should be sanitized recursively."""
181+
schema = {
182+
"type": "array",
183+
"items": {"anyOf": [{"type": "string"}, {"type": "integer"}]},
184+
}
185+
result = sanitize_schema_for_google_ai(schema)
186+
assert result == {
187+
"type": "array",
188+
"items": {"type": "string"},
189+
}
190+
191+
192+
def test_sanitize_schema_does_not_mutate_original():
193+
"""The original schema dict should not be modified."""
194+
schema = {
195+
"anyOf": [{"type": "string"}, {"type": "null"}],
196+
"description": "test",
197+
}
198+
original = {"anyOf": [{"type": "string"}, {"type": "null"}], "description": "test"}
199+
sanitize_schema_for_google_ai(schema)
200+
assert schema == original
201+
202+
203+
def test_sanitize_schema_agent_messages_param():
204+
"""Reproducer for issue #12442: str | list[str] parameter schema."""
205+
schema = {
206+
"anyOf": [
207+
{"type": "string"},
208+
{"type": "array", "items": {"type": "string"}},
209+
],
210+
"description": "The user messages for the agent.",
211+
}
212+
result = sanitize_schema_for_google_ai(schema)
213+
assert "anyOf" not in result
214+
assert result["type"] == "string"
215+
assert result["description"] == "The user messages for the agent."
216+
217+
218+
def test_sanitize_schema_allof():
219+
"""AllOf should be handled like anyOf/oneOf, picking the first variant."""
220+
schema = {
221+
"allOf": [
222+
{"type": "object", "properties": {"name": {"type": "string"}}},
223+
{"type": "object", "properties": {"age": {"type": "integer"}}},
224+
],
225+
}
226+
result = sanitize_schema_for_google_ai(schema)
227+
assert "allOf" not in result
228+
assert result["type"] == "object"
229+
assert "name" in result["properties"]
230+
231+
232+
def test_sanitize_schema_allof_with_null():
233+
"""AllOf with a null variant should produce nullable: true."""
234+
schema = {
235+
"allOf": [{"type": "string"}, {"type": "null"}],
236+
}
237+
result = sanitize_schema_for_google_ai(schema)
238+
assert "allOf" not in result
239+
assert result["type"] == "string"
240+
assert result["nullable"] is True
241+
242+
243+
def test_sanitize_schema_all_null_type_list():
244+
"""type: ["null"] should fall back to type: "string" + nullable: true."""
245+
schema = {"type": ["null"]}
246+
result = sanitize_schema_for_google_ai(schema)
247+
assert result == {"type": "string", "nullable": True}
248+
249+
250+
def test_sanitize_schema_all_null_anyof():
251+
"""AnyOf where all variants are null should fall back to type: "string"."""
252+
schema = {"anyOf": [{"type": "null"}]}
253+
result = sanitize_schema_for_google_ai(schema)
254+
assert result == {"type": "string", "nullable": True}
255+
256+
257+
def test_sanitize_schema_chosen_variant_keeps_own_description():
258+
"""When the chosen anyOf variant has its own description, do not overwrite it."""
259+
schema = {
260+
"anyOf": [
261+
{"type": "string", "description": "inner desc"},
262+
{"type": "null"},
263+
],
264+
"description": "outer desc",
265+
}
266+
result = sanitize_schema_for_google_ai(schema)
267+
assert result["description"] == "inner desc"
268+
assert result["nullable"] is True

0 commit comments

Comments
 (0)