Skip to content

Commit 00abf1b

Browse files
authored
fix(qwen3.5): add qwen3.5 preset and mimick llama.cpp's PEG (#8668)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
1 parent 959458f commit 00abf1b

File tree

2 files changed

+157
-8
lines changed

2 files changed

+157
-8
lines changed

pkg/functions/parse.go

Lines changed: 121 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,20 @@ func getAllXMLFormats() []xmlFormatPreset {
375375
TrimRawArgVal: true,
376376
},
377377
},
378+
{
379+
name: "qwen3.5",
380+
format: &XMLToolCallFormat{
381+
ScopeStart: "<tool_call>",
382+
ToolStart: "<function=",
383+
ToolSep: ">",
384+
KeyStart: "<parameter=",
385+
KeyValSep: ">",
386+
ValEnd: "</parameter>",
387+
ToolEnd: "</function>",
388+
ScopeEnd: "</tool_call>",
389+
TrimRawArgVal: true,
390+
},
391+
},
378392
{
379393
name: "glm-4.5",
380394
format: &XMLToolCallFormat{
@@ -483,9 +497,70 @@ func ParseXML(s string, format *XMLToolCallFormat) ([]FuncCallResults, error) {
483497
return parseXMLWithFormat(s, format)
484498
}
485499

500+
// getScopeOrToolStart returns the string to search for to start the tool-calls section
501+
// (ScopeStart if set, else ToolStart). Used to mimic llama.cpp's "content until <tool_call>" order.
502+
func getScopeOrToolStart(format *XMLToolCallFormat) string {
503+
if format == nil {
504+
return ""
505+
}
506+
if format.ScopeStart != "" {
507+
return format.ScopeStart
508+
}
509+
return format.ToolStart
510+
}
511+
512+
// tryParseXMLFromScopeStart finds the first occurrence of scopeStart (or format.ToolStart),
513+
// splits the input there, and parses only the suffix as XML tool calls. Returns (toolCalls, true)
514+
// if any tool calls were parsed, else (nil, false). This mimics llama.cpp's PEG order so that
515+
// reasoning or content before the tool block does not cause "whitespace only before scope" to fail.
516+
func tryParseXMLFromScopeStart(s string, format *XMLToolCallFormat, isPartial bool) ([]FuncCallResults, bool) {
517+
if format == nil {
518+
return nil, false
519+
}
520+
scopeStart := getScopeOrToolStart(format)
521+
if scopeStart == "" {
522+
return nil, false
523+
}
524+
idx := strings.Index(s, scopeStart)
525+
if idx < 0 {
526+
return nil, false
527+
}
528+
toolCallsPart := s[idx:]
529+
parser := NewChatMsgParser(toolCallsPart, isPartial)
530+
success, err := parser.TryConsumeXMLToolCalls(format)
531+
if err != nil {
532+
if _, ok := err.(*ChatMsgPartialException); ok && isPartial {
533+
return parser.ToolCalls(), len(parser.ToolCalls()) > 0
534+
}
535+
return nil, false
536+
}
537+
if success && len(parser.ToolCalls()) > 0 {
538+
return parser.ToolCalls(), true
539+
}
540+
return nil, false
541+
}
542+
486543
// ParseXMLIterative parses XML tool calls using the iterative parser
487-
// This provides better streaming and partial parsing support
544+
// This provides better streaming and partial parsing support.
545+
// When format is nil or when format is set, tries "find scope/tool start, split, parse suffix"
546+
// first (llama.cpp PEG order) so that content before the tool block does not cause parse failure.
488547
func ParseXMLIterative(s string, format *XMLToolCallFormat, isPartial bool) ([]FuncCallResults, error) {
548+
// Try split-on-scope first so reasoning/content before tool block is skipped
549+
if format != nil {
550+
if results, ok := tryParseXMLFromScopeStart(s, format, isPartial); ok {
551+
return results, nil
552+
}
553+
} else {
554+
formats := getAllXMLFormats()
555+
for _, fmtPreset := range formats {
556+
if fmtPreset.format != nil {
557+
if results, ok := tryParseXMLFromScopeStart(s, fmtPreset.format, isPartial); ok {
558+
return results, nil
559+
}
560+
}
561+
}
562+
}
563+
489564
parser := NewChatMsgParser(s, isPartial)
490565

491566
// Auto-detect format if not provided
@@ -1621,16 +1696,54 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC
16211696
// but we've already parsed it, so we shouldn't try XML parsing on the same content
16221697
skipXMLParsing := (len(functionConfig.JSONRegexMatch) > 0 || len(functionConfig.ResponseRegex) > 0) && len(results) > 0
16231698
if len(results) == 0 && !skipXMLParsing {
1624-
xmlResults, err := ParseXML(llmresult, xmlFormat)
1625-
if err == nil && len(xmlResults) > 0 {
1626-
xlog.Debug("Found XML tool calls", "count", len(xmlResults))
1627-
results = append(results, xmlResults...)
1699+
// Mimic llama.cpp PEG order: try "find scope/tool start, split, parse suffix" first so that
1700+
// reasoning or content before the tool block (e.g. <think>...</think>) does not cause parse failure.
1701+
if xmlFormat != nil {
1702+
if xmlResults, ok := tryParseXMLFromScopeStart(llmresult, xmlFormat, false); ok {
1703+
xlog.Debug("Found XML tool calls (split-on-scope)", "count", len(xmlResults))
1704+
results = append(results, xmlResults...)
1705+
}
1706+
} else {
1707+
formats := getAllXMLFormats()
1708+
for _, fmtPreset := range formats {
1709+
if fmtPreset.format != nil {
1710+
if xmlResults, ok := tryParseXMLFromScopeStart(llmresult, fmtPreset.format, false); ok {
1711+
xlog.Debug("Found XML tool calls (split-on-scope, auto-detect)", "format", fmtPreset.name, "count", len(xmlResults))
1712+
results = append(results, xmlResults...)
1713+
break
1714+
}
1715+
}
1716+
}
1717+
}
1718+
if len(results) == 0 {
1719+
xmlResults, err := ParseXML(llmresult, xmlFormat)
1720+
if err == nil && len(xmlResults) > 0 {
1721+
xlog.Debug("Found XML tool calls", "count", len(xmlResults))
1722+
results = append(results, xmlResults...)
1723+
}
16281724
}
16291725
} else if len(results) > 0 && !skipXMLParsing {
16301726
// Even if we found JSON results, check for XML tool calls in the response
1631-
// This handles mixed content scenarios (text + JSON + XML)
1632-
// But skip if JSONRegexMatch or ResponseRegex was used (they already extracted the content)
1633-
xmlResults, err := ParseXML(llmresult, xmlFormat)
1727+
// Try split-on-scope first (llama.cpp order), then full ParseXML
1728+
var xmlResults []FuncCallResults
1729+
var err error
1730+
if xmlFormat != nil {
1731+
xmlResults, _ = tryParseXMLFromScopeStart(llmresult, xmlFormat, false)
1732+
}
1733+
if len(xmlResults) == 0 && xmlFormat == nil {
1734+
formats := getAllXMLFormats()
1735+
for _, fmtPreset := range formats {
1736+
if fmtPreset.format != nil {
1737+
xmlResults, _ = tryParseXMLFromScopeStart(llmresult, fmtPreset.format, false)
1738+
if len(xmlResults) > 0 {
1739+
break
1740+
}
1741+
}
1742+
}
1743+
}
1744+
if len(xmlResults) == 0 {
1745+
xmlResults, err = ParseXML(llmresult, xmlFormat)
1746+
}
16341747
if err == nil && len(xmlResults) > 0 {
16351748
// Check if JSON is inside XML tags, if so, skip it
16361749
for _, result := range xmlResults {

pkg/functions/parse_test.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,42 @@ value
779779
Expect(results[0].Name).To(Equal("glob"))
780780
Expect(results[0].Arguments).To(Equal(`{"pattern":"**/package.json"}`))
781781
})
782+
It("should parse tool calls when reasoning (<think>) precedes tool block (Qwen3.5-style)", func() {
783+
input := `<think>
784+
I need to run a command.
785+
</think>
786+
<tool_call>
787+
<function=bash>
788+
<parameter=script>
789+
echo hello
790+
</parameter>
791+
</function>
792+
</tool_call>`
793+
cfg := FunctionsConfig{}
794+
results := ParseFunctionCall(input, cfg)
795+
Expect(results).To(HaveLen(1))
796+
Expect(results[0].Name).To(Equal("bash"))
797+
Expect(results[0].Arguments).To(ContainSubstring("echo hello"))
798+
})
799+
800+
It("should parse tool calls when reasoning (<think>) precedes tool block (Qwen3.5-style)", func() {
801+
input := `<think>
802+
I need to run a command.
803+
</think>
804+
<tool_call>
805+
<function=bash>
806+
<parameter=script>
807+
echo hello
808+
</parameter>
809+
</function>
810+
</tool_call>`
811+
cfg := FunctionsConfig{}
812+
cfg.XMLFormatPreset = "qwen3.5"
813+
results := ParseFunctionCall(input, cfg)
814+
Expect(results).To(HaveLen(1))
815+
Expect(results[0].Name).To(Equal("bash"))
816+
Expect(results[0].Arguments).To(ContainSubstring("echo hello"))
817+
})
782818

783819
It("should parse XML tool calls alongside JSON tool calls", func() {
784820
input := `{"name": "add", "arguments": {"x": 5, "y": 3}}

0 commit comments

Comments
 (0)