Closed
Description
Expected Behavior
Using LlamaGrammar.from_json_schema with a json schema that includes "oneOf"/"anyOf" should work.
Current Behavior
Assert triggered in line 1436 of llama_grammar.py - for the example below:
AssertionError: Unrecognized schema: {'anyOf': [{'description': 'Unit for temperature', 'enum': ['celsius', 'fahrenheit'], 'type': 'string'}, {'type': 'null'}]}
Environment and Context
Linux, python 3.11.6, llama-cpp-python v0.2.26.
Steps to Reproduce
Run the following:
import json
from llama_cpp import llama_grammar
sch = {
'properties': {
'temperature': {'description': 'The temperature mentioned',
'type': 'number'},
'unit': {'anyOf': [{'description': 'Unit for temperature',
'enum': ['celsius', 'fahrenheit'],
'type': 'string'},
{'type': 'null'}],
}},
'type': 'object'}
g = llama_grammar.LlamaGrammar.from_json_schema(json.dumps(sch))
Triggers an assert in line 1436 of llama_grammar.py:
schema_type: Optional[str] = schema.get("type") # type: ignore
assert isinstance(schema_type, str), f"Unrecognized schema: {schema}"
This happens because the 'anyOf' entry doesn't use a 'type' key. (and this is a valid JSON schema)
Replacing the SchemaConverter.visit() method with the following code fixes the problem, as far as I could test:
def visit(self,
schema: dict[str, Any],
name: str) -> str:
rule_name = name or "root"
if "$defs" in schema:
# add defs to self._defs for later inlining
for def_name, def_schema in schema["$defs"].items():
self._defs[def_name] = def_schema
if "oneOf" in schema or "anyOf" in schema:
rule = " | ".join(
(
self.visit(alt_schema, f'{name}{"-" if name else ""}{i}')
for i, alt_schema in enumerate(
schema.get("oneOf") or schema["anyOf"]
)
)
)
return self._add_rule(rule_name, rule)
elif "const" in schema:
return self._add_rule(rule_name, self._format_literal(schema["const"]))
elif "enum" in schema:
rule = " | ".join((self._format_literal(v) for v in schema["enum"]))
return self._add_rule(rule_name, rule)
elif "$ref" in schema:
ref = schema["$ref"]
assert ref.startswith("#/$defs/"), f"Unrecognized schema: {schema}"
# inline $defs
def_name = ref[len("#/$defs/") :]
def_schema = self._defs[def_name]
return self.visit(def_schema, f'{name}{"-" if name else ""}{def_name}')
schema_type: Optional[str] = schema.get("type") # type: ignore
assert isinstance(schema_type, str), f"Unrecognized schema: {schema}"
if schema_type == "object" and "properties" in schema:
# TODO: `required` keyword
prop_order = self._prop_order
prop_pairs = sorted(
schema["properties"].items(),
# sort by position in prop_order (if specified) then by key
key=lambda kv: (prop_order.get(kv[0], len(prop_order)), kv[0]),
)
rule = '"{" space'
for i, (prop_name, prop_schema) in enumerate(prop_pairs):
prop_rule_name = self.visit(
prop_schema, f'{name}{"-" if name else ""}{prop_name}'
)
if i > 0:
rule += ' "," space'
rule += rf' {self._format_literal(prop_name)} space ":" space {prop_rule_name}'
rule += ' "}" space'
return self._add_rule(rule_name, rule)
elif schema_type == "array" and "items" in schema:
# TODO `prefixItems` keyword
item_rule_name = self.visit(
schema["items"], f'{name}{"-" if name else ""}item'
)
rule = (
f'"[" space ({item_rule_name} ("," space {item_rule_name})*)? "]" space'
)
return self._add_rule(rule_name, rule)
else:
assert schema_type in PRIMITIVE_RULES, f"Unrecognized schema: {schema}"
return self._add_rule(
"root" if rule_name == "root" else schema_type,
PRIMITIVE_RULES[schema_type],
)
The schema_type and assert were moved down, to just before schema_type is actually needed to handle "object" entries.
Metadata
Metadata
Assignees
Labels
No labels