From a46418fa27a9bfa8fc9e4de80ea446aff286763f Mon Sep 17 00:00:00 2001 From: Jeremiah Blanchard Date: Sun, 19 Apr 2026 04:06:29 +0000 Subject: [PATCH] chat : add MiniMax M2 specialized tool-call handler The autoparser (peg-native) infers a grammar from the MiniMax-M2 template that handles a single element cleanly but mis-specifies the repetition rule for multiple elements inside one wrapper. Parallel tool calls with the generic path trip the streaming parser's self-consistency check ("Invalid diff: now finding less tool calls!"), which is the test-harness analogue of the production GGML_ABORT at llama-grammar.cpp:1435 on real MiniMax M2.7 output. Add a specialized handler following the Kimi K2 pattern: XML invoke/parameter parsing, lazy grammar gated by trigger, reasoning extraction via /. Dispatch requires three MiniMax-specific literals in the template source (, ...value...... +// - Reasoning: {reasoning} (optional) +static common_chat_params common_chat_params_init_minimax(const common_chat_template & tmpl, + const autoparser::generation_params & inputs) { + common_chat_params data; + + data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs); + data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; + data.supports_thinking = true; + data.thinking_start_tag = ""; + data.thinking_end_tag = ""; + data.preserved_tokens = { + "", + "", + "", + "", + }; + + auto has_tools = inputs.tools.is_array() && !inputs.tools.empty(); + auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE; + auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE; + + const std::string TC_START = ""; + const std::string TC_END = ""; + const std::string THINK_START = ""; + const std::string THINK_END = ""; + + auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) { + auto end = p.end(); + + // Reasoning extraction (same pattern as Kimi K2) + auto reasoning = extract_reasoning ? p.optional(THINK_START + p.reasoning( + p.until_one_of({ THINK_END, TC_START })) + + p.optional(p.literal(THINK_END))) : p.eps(); + auto generation_prompt = p.prefix(inputs.generation_prompt, THINK_START); + + // Content only parser (no tools) + if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) { + return generation_prompt + reasoning + p.content(p.rest()) + end; + } + + // Build tool call parsers for each available function + // MiniMax format: value... + auto tool_choice = p.choice(); + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + std::string name = function.at("name"); + const auto & params = function.at("parameters"); + + + // Build parameter parsers + auto arg_choice = p.choice(); + if (params.contains("properties") && !params["properties"].empty()) { + for (const auto & el : params["properties"].items()) { + const std::string & prop_name = el.key(); + const auto & prop_def = el.value(); + bool is_string_type = (prop_def.contains("type") && prop_def["type"] == "string"); + + // value + auto arg_rule = p.tool_arg( + p.tool_arg_open(p.literal("") + + (is_string_type + ? p.tool_arg_string_value(p.until("")) + : p.tool_arg_value(p.until(""))) + + p.tool_arg_close(p.literal("")) + ); + arg_choice |= arg_rule; + } + } + auto args = p.zero_or_more(p.space() + arg_choice); + + // ...params... + auto tool_parser = p.tool( + p.tool_open( + p.literal("") + ) + + p.tool_args(args) + + p.space() + + p.tool_close(p.literal("")) + ); + + tool_choice |= p.rule("tool-" + name, tool_parser); + }); + + // Tool calls section: \n...tool calls...\n + auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0; + auto max_calls = inputs.parallel_tool_calls ? -1 : 1; + auto tool_calls = p.rule("tool-calls", + p.trigger_rule("tool-call", + p.literal(TC_START) + p.space() + + p.repeat(tool_choice + p.space(), min_calls, max_calls) + + p.optional(p.literal(TC_END))) + ); + + auto content_before_tools = p.content(p.until_one_of({ TC_START })); + + return generation_prompt + reasoning + content_before_tools + tool_calls + end; + }); + + data.parser = parser.save(); + + if (include_grammar) { + data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + auto schema = function.at("parameters"); + builder.resolve_refs(schema); + }); + parser.build_grammar(builder, data.grammar_lazy); + }); + + data.grammar_triggers = { + { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "" } + }; + } + + return data; +} + static json common_chat_extra_context() { json ctx = json::object(); std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); @@ -2116,6 +2240,14 @@ std::optional common_chat_try_specialized_template( return common_chat_params_init_deepseek_v3_2(tmpl, params); } + // MiniMax M2 format detection + if (src.find("") != std::string::npos && + src.find("call:'") != std::string::npos) { if (src.find("{#- OpenAI Chat Completions:") == std::string::npos) { diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 3b8de5ce02e5..692182940b05 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -3448,6 +3448,74 @@ static void test_template_output_peg_parsers(bool detailed_debug) { .expect(message_assist_call) .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) .run(); + + // Parallel tool calls (two different tools) inside one wrapper + tst.test( + "\n" + "\n1\n\n" + "\nprint('hey')\n\n" + "") + .tools({ special_function_tool, python_tool }) + .parallel_tool_calls(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_with_reasoning_content_and_multiple_tool_calls( + "", "", + { + { "special_function", R"x({"arg1": 1})x" }, + { "python", R"x({"code": "print('hey')"})x" }, + })) + .run(); + + // String parameter with embedded XML-ish content + tst.test( + "\n\n" + "
\n" + "
\n
") + .tools({ html_tool }) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_with_tool_calls( + "html", R"x({"markup": "
"})x")) + .run(); + + // Multi-line string parameter value + tst.test( + "\n\n" + "import os\nfor k in os.environ:\n print(k)\n" + "\n") + .tools({ python_tool }) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_with_tool_calls( + "python", R"x({"code": "import os\nfor k in os.environ:\n print(k)"})x")) + .run(); + + // Tool with two integer parameters + tst.test( + "\n\n" + "1\n" + "42\n" + "\n") + .tools({ special_function_tool_with_optional_param }) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_with_tool_calls( + "special_function_with_opt", R"x({"arg1": 1, "arg2": 42})x")) + .run(); + + // Parallel calls to the same tool inside one wrapper + tst.test( + "\n" + "\nprint('a')\n\n" + "\nprint('b')\n\n" + "") + .tools({ python_tool }) + .parallel_tool_calls(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_with_reasoning_content_and_multiple_tool_calls( + "", "", + { + { "python", R"x({"code": "print('a')"})x" }, + { "python", R"x({"code": "print('b')"})x" }, + })) + .run(); } // NVIDIA-Nemotron-Nano-v2 tests - ... format