From a46418fa27a9bfa8fc9e4de80ea446aff286763f Mon Sep 17 00:00:00 2001
From: Jeremiah Blanchard <jjb@eng.ufl.edu>
Date: Sun, 19 Apr 2026 04:06:29 +0000
Subject: [PATCH] chat : add MiniMax M2 specialized tool-call handler

The autoparser (peg-native) infers a grammar from the MiniMax-M2
template that handles a single <invoke> element cleanly but
mis-specifies the repetition rule for multiple <invoke> elements
inside one <minimax:tool_call> wrapper. Parallel tool calls with
the generic path trip the streaming parser's self-consistency check
("Invalid diff: now finding less tool calls!"), which is the
test-harness analogue of the production GGML_ABORT at
llama-grammar.cpp:1435 on real MiniMax M2.7 output.

Add a specialized handler following the Kimi K2 pattern: XML
invoke/parameter parsing, lazy grammar gated by <minimax:tool_call>
trigger, reasoning extraction via <think>/</think>. Dispatch
requires three MiniMax-specific literals in the template source
(<minimax:tool_call>, <invoke name=, <parameter name=) so any
future variant that drops the XML idiom falls through to the
autoparser.

Include five test fixtures in tests/test-chat.cpp: parallel calls
with different tools, parallel calls with the same tool (both repro
the gap), string parameter with embedded XML-ish content, multi-line
string value, and two-integer-parameter invocation. The three
passing-on-master cases document that the autoparser's gap is
specifically repetition, not content shape.
---
 common/chat.cpp     | 132 ++++++++++++++++++++++++++++++++++++++++++++
 tests/test-chat.cpp |  68 +++++++++++++++++++++++
 2 files changed, 200 insertions(+)
diff --git a/common/chat.cpp b/common/chat.cpp
index e27b6c3413c9..7661828f9c6a 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -2041,6 +2041,130 @@ static void func_args_not_string(json & messages) {
 
 }
 
+// MiniMax M2 format: uses <minimax:tool_call>...<invoke name="tool_name"><parameter name="key">value</parameter>...</invoke>...</minimax:tool_call>
+// - Reasoning: <think>{reasoning}</think> (optional)
+static common_chat_params common_chat_params_init_minimax(const common_chat_template &    tmpl,
+                                                           const autoparser::generation_params & inputs) {
+    common_chat_params data;
+
+    data.prompt             = common_chat_template_direct_apply_impl(tmpl, inputs);
+    data.format             = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.supports_thinking  = true;
+    data.thinking_start_tag = "<think>";
+    data.thinking_end_tag   = "</think>";
+    data.preserved_tokens   = {
+        "<minimax:tool_call>",
+        "</minimax:tool_call>",
+        "<think>",
+        "</think>",
+    };
+
+    auto has_tools         = inputs.tools.is_array() && !inputs.tools.empty();
+    auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+    auto include_grammar   = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
+
+    const std::string TC_START   = "<minimax:tool_call>";
+    const std::string TC_END     = "</minimax:tool_call>";
+    const std::string THINK_START = "<think>";
+    const std::string THINK_END   = "</think>";
+
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        auto end = p.end();
+
+        // Reasoning extraction (same pattern as Kimi K2)
+        auto reasoning = extract_reasoning ? p.optional(THINK_START + p.reasoning(
+            p.until_one_of({ THINK_END, TC_START })) +
+            p.optional(p.literal(THINK_END))) : p.eps();
+        auto generation_prompt = p.prefix(inputs.generation_prompt, THINK_START);
+
+        // Content only parser (no tools)
+        if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
+            return generation_prompt + reasoning + p.content(p.rest()) + end;
+        }
+
+        // Build tool call parsers for each available function
+        // MiniMax format: <invoke name="tool_name"><parameter name="key">value</parameter>...</invoke>
+        auto tool_choice = p.choice();
+        foreach_function(inputs.tools, [&](const json & tool) {
+            const auto & function = tool.at("function");
+            std::string  name     = function.at("name");
+            const auto & params   = function.at("parameters");
+
+
+            // Build parameter parsers
+            auto arg_choice = p.choice();
+            if (params.contains("properties") && !params["properties"].empty()) {
+                for (const auto & el : params["properties"].items()) {
+                    const std::string & prop_name = el.key();
+                    const auto & prop_def = el.value();
+                    bool is_string_type = (prop_def.contains("type") && prop_def["type"] == "string");
+
+                    // <parameter name="prop_name">value</parameter>
+                    auto arg_rule = p.tool_arg(
+                        p.tool_arg_open(p.literal("<parameter name=\"")) +
+                        p.tool_arg_name(p.literal(prop_name)) +
+                        p.literal("\">") +
+                        (is_string_type
+                            ? p.tool_arg_string_value(p.until("</parameter>"))
+                            : p.tool_arg_value(p.until("</parameter>"))) +
+                        p.tool_arg_close(p.literal("</parameter>"))
+                    );
+                    arg_choice |= arg_rule;
+                }
+            }
+            auto args = p.zero_or_more(p.space() + arg_choice);
+
+            // <invoke name="tool_name">...params...</invoke>
+            auto tool_parser = p.tool(
+                p.tool_open(
+                    p.literal("<invoke name=\"") +
+                    p.tool_name(p.literal(name)) +
+                    p.literal("\">")
+                ) +
+                p.tool_args(args) +
+                p.space() +
+                p.tool_close(p.literal("</invoke>"))
+            );
+
+            tool_choice |= p.rule("tool-" + name, tool_parser);
+        });
+
+        // Tool calls section: <minimax:tool_call>\n...tool calls...\n</minimax:tool_call>
+        auto min_calls  = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
+        auto max_calls  = inputs.parallel_tool_calls ? -1 : 1;
+        auto tool_calls = p.rule("tool-calls",
+            p.trigger_rule("tool-call",
+                p.literal(TC_START) + p.space() +
+                p.repeat(tool_choice + p.space(), min_calls, max_calls) +
+                p.optional(p.literal(TC_END)))
+        );
+
+        auto content_before_tools = p.content(p.until_one_of({ TC_START }));
+
+        return generation_prompt + reasoning + content_before_tools + tool_calls + end;
+    });
+
+    data.parser = parser.save();
+
+    if (include_grammar) {
+        data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
+        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
+            foreach_function(inputs.tools, [&](const json & tool) {
+                const auto & function = tool.at("function");
+                auto         schema   = function.at("parameters");
+                builder.resolve_refs(schema);
+            });
+            parser.build_grammar(builder, data.grammar_lazy);
+        });
+
+        data.grammar_triggers = {
+            { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<minimax:tool_call>" }
+        };
+    }
+
+    return data;
+}
+
 static json common_chat_extra_context() {
     json ctx = json::object();
     std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
@@ -2116,6 +2240,14 @@ std::optional<common_chat_params> common_chat_try_specialized_template(
         return common_chat_params_init_deepseek_v3_2(tmpl, params);
     }
 
+    // MiniMax M2 format detection
+    if (src.find("<minimax:tool_call>") != std::string::npos &&
+        src.find("<invoke name=") != std::string::npos &&
+        src.find("<parameter name=") != std::string::npos) {
+        LOG_DBG("Using specialized template: MiniMax M2\n");
+        return common_chat_params_init_minimax(tmpl, params);
+    }
+
     // Gemma4 format detection
     if (src.find("'<|tool_call>call:'") != std::string::npos) {
         if (src.find("{#- OpenAI Chat Completions:") == std::string::npos) {
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index 3b8de5ce02e5..692182940b05 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -3448,6 +3448,74 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
             .expect(message_assist_call)
             .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
             .run();
+
+        // Parallel tool calls (two different tools) inside one wrapper
+        tst.test(
+               "</think><minimax:tool_call>\n"
+               "<invoke name=\"special_function\">\n<parameter name=\"arg1\">1</parameter>\n</invoke>\n"
+               "<invoke name=\"python\">\n<parameter name=\"code\">print('hey')</parameter>\n</invoke>\n"
+               "</minimax:tool_call>")
+            .tools({ special_function_tool, python_tool })
+            .parallel_tool_calls(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_with_reasoning_content_and_multiple_tool_calls(
+                "", "",
+                {
+                    { "special_function", R"x({"arg1": 1})x" },
+                    { "python", R"x({"code": "print('hey')"})x" },
+                }))
+            .run();
+
+        // String parameter with embedded XML-ish content
+        tst.test(
+               "</think><minimax:tool_call>\n<invoke name=\"html\">\n"
+               "<parameter name=\"markup\"><div><script>alert('x')</script></div></parameter>\n"
+               "</invoke>\n</minimax:tool_call>")
+            .tools({ html_tool })
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_with_tool_calls(
+                "html", R"x({"markup": "<div><script>alert('x')</script></div>"})x"))
+            .run();
+
+        // Multi-line string parameter value
+        tst.test(
+               "</think><minimax:tool_call>\n<invoke name=\"python\">\n"
+               "<parameter name=\"code\">import os\nfor k in os.environ:\n    print(k)</parameter>\n"
+               "</invoke>\n</minimax:tool_call>")
+            .tools({ python_tool })
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_with_tool_calls(
+                "python", R"x({"code": "import os\nfor k in os.environ:\n    print(k)"})x"))
+            .run();
+
+        // Tool with two integer parameters
+        tst.test(
+               "</think><minimax:tool_call>\n<invoke name=\"special_function_with_opt\">\n"
+               "<parameter name=\"arg1\">1</parameter>\n"
+               "<parameter name=\"arg2\">42</parameter>\n"
+               "</invoke>\n</minimax:tool_call>")
+            .tools({ special_function_tool_with_optional_param })
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_with_tool_calls(
+                "special_function_with_opt", R"x({"arg1": 1, "arg2": 42})x"))
+            .run();
+
+        // Parallel calls to the same tool inside one wrapper
+        tst.test(
+               "</think><minimax:tool_call>\n"
+               "<invoke name=\"python\">\n<parameter name=\"code\">print('a')</parameter>\n</invoke>\n"
+               "<invoke name=\"python\">\n<parameter name=\"code\">print('b')</parameter>\n</invoke>\n"
+               "</minimax:tool_call>")
+            .tools({ python_tool })
+            .parallel_tool_calls(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_with_reasoning_content_and_multiple_tool_calls(
+                "", "",
+                {
+                    { "python", R"x({"code": "print('a')"})x" },
+                    { "python", R"x({"code": "print('b')"})x" },
+                }))
+            .run();
     }
 
     // NVIDIA-Nemotron-Nano-v2 tests - <TOOLCALL>...</TOOLCALL> format