From 02ec17c024e3aeeb019dd08581a4b4af2b316085 Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Wed, 29 Apr 2026 01:20:56 -0700 Subject: [PATCH 01/27] Support dynamic input for Topk op --- src/include/migraphx/dyn_output.hpp | 2 +- src/include/migraphx/op/topk.hpp | 40 +++++++--- src/onnx/parse_topk.cpp | 41 +++++++--- src/rewrite_topk.cpp | 2 + src/targets/gpu/compile_ops.cpp | 15 ++-- src/targets/gpu/include/migraphx/gpu/hip.hpp | 3 + src/targets/gpu/lowering.cpp | 4 +- test/ref/topk.cpp | 80 ++++++++++++++++++++ test/verify/test_topk_dynamic.cpp | 57 ++++++++++++++ 9 files changed, 213 insertions(+), 31 deletions(-) create mode 100644 test/verify/test_topk_dynamic.cpp diff --git a/src/include/migraphx/dyn_output.hpp b/src/include/migraphx/dyn_output.hpp index ac3263cde3b..2963751b038 100644 --- a/src/include/migraphx/dyn_output.hpp +++ b/src/include/migraphx/dyn_output.hpp @@ -52,7 +52,7 @@ struct compute_output_shape operator dyn_output() const { return ins_inputs([](const auto& x, shape ins_shape, const std::vector& inputs) { - if(ins_shape.dynamic()) + if(ins_shape.any_of_dynamic()) //some op returns a tuple shape e.g. TopK return dyn_output{ins_shape, compute_shape(x, to_shapes(inputs))}; return dyn_output{ins_shape, ins_shape}; }); diff --git a/src/include/migraphx/op/topk.hpp b/src/include/migraphx/op/topk.hpp index 5ff9393e24c..f9a53c99ebd 100644 --- a/src/include/migraphx/op/topk.hpp +++ b/src/include/migraphx/op/topk.hpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -60,16 +61,31 @@ struct topk shape normalize_compute_shape(std::vector inputs) const { - check_shapes{inputs, *this}.has(1, 2); - auto lens = inputs.at(0).lens(); + check_shapes{inputs, *this, true}.has(1, 2); auto type = inputs.at(0).type(); - lens[axis] = k; + if(inputs.at(0).dynamic()) + { + auto dyn_dims = inputs.at(0).dyn_dims(); + auto max_lens_vec = inputs.at(0).max_lens(); + auto kk = std::min(static_cast(k), max_lens_vec[axis]); + dyn_dims[axis] = {kk, kk}; - shape s_val{type, lens}; - shape s_ind{shape::int64_type, lens}; + shape s_val{type, dyn_dims}; + shape s_ind{shape::int64_type, dyn_dims}; + return shape({s_val, s_ind}); + } + else + { + auto lens = inputs.at(0).lens(); + // Clamp k to input size: k may be a placeholder (max dim) from parse time + auto kk = std::min(static_cast(k), lens[axis]); + lens[axis] = kk; - return shape({s_val, s_ind}); + shape s_val{type, lens}; + shape s_ind{shape::int64_type, lens}; + return shape({s_val, s_ind}); + } } template @@ -84,13 +100,15 @@ struct topk }; } - argument compute(const shape& output_shape, std::vector args) const + argument compute(const dyn_output& dyn_out, std::vector args) const { + const auto& output_shape = dyn_out.computed_shape; const auto& vec_ss = output_shape.sub_shapes(); argument res_val{vec_ss.front()}; argument res_ind{vec_ss.back()}; auto in_val = args.front(); auto relements = in_val.get_shape().lens()[axis]; + auto actual_k = std::min(static_cast(k), relements); auto make_indices = [&](const auto& m_idx) { return [&](int64_t i) { if(args.size() < 2) @@ -118,20 +136,20 @@ struct topk }); if(this->largest) std::partial_sort(data.begin(), - data.begin() + k, + data.begin() + actual_k, data.end(), compare_pair(std::greater<>{})); else std::partial_sort(data.begin(), - data.begin() + k, + data.begin() + actual_k, data.end(), compare_pair(std::less<>{})); std::transform(data.begin(), - data.begin() + this->k, + data.begin() + actual_k, y.begin(), [](const auto& p) { return p.first; }); std::transform(data.begin(), - data.begin() + this->k, + data.begin() + actual_k, y_ind.begin(), [](const auto& p) { return p.second; }); }); diff --git a/src/onnx/parse_topk.cpp b/src/onnx/parse_topk.cpp index 66ab9f7ad95..26027d04352 100644 --- a/src/onnx/parse_topk.cpp +++ b/src/onnx/parse_topk.cpp @@ -40,18 +40,6 @@ struct parse_topk : op_parser onnx_parser::node_info info, std::vector args) const { - int64_t k = 0; - if(args.size() == 2) - { - auto arg_k = args.at(1)->eval(); - check_arg_empty(arg_k, "PARSE_TopK: k input must be constant"); - k = arg_k.at(); - } - else if(contains(info.attributes, "k")) - { - k = info.attributes.at("k").i(); - } - bool largest = true; if(contains(info.attributes, "largest")) { @@ -64,6 +52,35 @@ struct parse_topk : op_parser axis = parser.parse_value(info.attributes.at("axis")).at(); } + int64_t k = 0; + if(args.size() == 2) + { + auto arg_k = args.at(1)->eval(); + if(arg_k.empty()) + { + // k is not constant: use the input dimension along the topk axis + auto input_shape = args.at(0)->get_shape(); + auto ndim = input_shape.ndim(); + auto norm_axis = axis < 0 ? axis + static_cast(ndim) : axis; + if(input_shape.dynamic()) + { + k = input_shape.dyn_dims().at(norm_axis).get_interval().max; + } + else + { + k = input_shape.lens().at(norm_axis); + } + } + else + { + k = arg_k.at(); + } + } + else if(contains(info.attributes, "k")) + { + k = info.attributes.at("k").i(); + } + auto topk_ret = info.add_instruction( make_op("topk", {{"k", k}, {"axis", axis}, {"largest", largest}}), args.at(0)); diff --git a/src/rewrite_topk.cpp b/src/rewrite_topk.cpp index 19411680db8..f4abd88972a 100644 --- a/src/rewrite_topk.cpp +++ b/src/rewrite_topk.cpp @@ -43,6 +43,8 @@ struct find_large_topk { auto ins = r.result; auto input = ins->inputs().front(); + if(input->get_shape().dynamic()) + return; auto op = ins->get_operator().to_value(); auto axis = op["axis"].to(); auto k = op["k"].to(); diff --git a/src/targets/gpu/compile_ops.cpp b/src/targets/gpu/compile_ops.cpp index 52272b1d7af..49c9e86e6ff 100644 --- a/src/targets/gpu/compile_ops.cpp +++ b/src/targets/gpu/compile_ops.cpp @@ -159,11 +159,16 @@ struct dynamic_code_object_op return results.front(); } - if(output_arg.get_shape().dynamic()) - { - auto out_shape = pre_op.compute_shape(to_shapes(static_args), module_args); - static_args[static_args.size() - 1] = output_arg.reshape(out_shape); - } + + // dynamic_code_object_op::compute is never called for static instructions. + // Always recompute the output shape from actual (static) input shapes. + // The pre-allocated output buffer may have been sized for the max dynamic + // dimensions (e.g. TopK with k=100 placeholder) while the actual output + // is smaller (e.g. k=10 when input n=10). Reshaping the buffer to the + // computed shape ensures the compiled kernel sees the correct dimensions. + auto out_shape = pre_op.compute_shape(to_shapes(static_args), module_args); + static_args[static_args.size() - 1] = output_arg.reshape(out_shape); + // Rewrite submodule without dynamic shapes to be used as the IR for compilation module static_submod; diff --git a/src/targets/gpu/include/migraphx/gpu/hip.hpp b/src/targets/gpu/include/migraphx/gpu/hip.hpp index d04e81e218c..7dd8c503864 100644 --- a/src/targets/gpu/include/migraphx/gpu/hip.hpp +++ b/src/targets/gpu/include/migraphx/gpu/hip.hpp @@ -30,6 +30,7 @@ #include #include #include +#include #include namespace migraphx { @@ -258,6 +259,8 @@ struct hip_allocate_memory argument a = allocate_gpu(s); store_preallocated_param(ctx, id, a); } + // This scratch buffers need to be use in runtime JIT + lifetime get_lifetime() const { return lifetime::global; } }; struct hip_copy_literal diff --git a/src/targets/gpu/lowering.cpp b/src/targets/gpu/lowering.cpp index 5eca58aaa13..e5547e5216e 100644 --- a/src/targets/gpu/lowering.cpp +++ b/src/targets/gpu/lowering.cpp @@ -239,8 +239,8 @@ struct miopen_apply instruction_ref insert_dynamic_code_object_op(instruction_ref ins) const { assert(ins->get_operator().name() == "gpu::precompile_op"); - - if(not ins->get_shape().dynamic()) + //some op returns a tuple shape e.g. TopK + if(not ins->get_shape().any_of_dynamic()) return ins; return mod->replace_instruction( diff --git a/test/ref/topk.cpp b/test/ref/topk.cpp index 5e2ea0e0246..5e77029bd38 100644 --- a/test/ref/topk.cpp +++ b/test/ref/topk.cpp @@ -145,3 +145,83 @@ TEST_CASE(topk_smallest_custom_indices) std::vector gold_ind = {11, 13, 15, 14, 7, 9, 6, 10, 2, 5, 1, 3}; EXPECT(results.second == gold_ind); } + +// Test k > n with dynamic shapes: k=100 placeholder but runtime input has 10 elements +TEST_CASE(topk_k_greater_than_n_dynamic) +{ + migraphx::program p; + auto* mm = p.get_main_module(); + // Dynamic shape: axis 0 ranges from 1 to 100 + std::vector dds = {{1, 100}}; + migraphx::shape s{migraphx::shape::float_type, dds}; + auto data = mm->add_parameter("data", s); + // k=100 is the max placeholder from parse time + auto r = mm->add_instruction( + migraphx::make_op("topk", {{"axis", 0}, {"k", 100}, {"largest", 1}}), data); + auto r0 = mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), r); + auto r1 = mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 1}}), r); + mm->add_return({r0, r1}); + + p.compile(migraphx::make_target("ref")); + + // Runtime: only 5 elements + std::vector input_data = {3.0f, 1.0f, 4.0f, 1.5f, 2.0f}; + migraphx::shape input_fixed{migraphx::shape::float_type, {5}}; + migraphx::parameter_map pp; + pp["data"] = migraphx::argument(input_fixed, input_data.data()); + auto rets = p.eval(pp); + + std::vector ret_val; + rets.front().visit([&](auto v) { ret_val.assign(v.begin(), v.end()); }); + std::vector ret_ind; + rets.back().visit([&](auto v) { ret_ind.assign(v.begin(), v.end()); }); + + // k=100 clamped to n=5, sorted descending + EXPECT(ret_val.size() == 5u); + std::vector gold_val = {4.0f, 3.0f, 2.0f, 1.5f, 1.0f}; + EXPECT(ret_val == gold_val); + std::vector gold_ind = {2, 0, 4, 3, 1}; + EXPECT(ret_ind == gold_ind); +} + +// Test k == n: k equals the axis dimension, should return all elements sorted +TEST_CASE(topk_k_equals_n) +{ + migraphx::program p; + auto* mm = p.get_main_module(); + migraphx::shape s{migraphx::shape::float_type, {3, 5}}; + auto data = mm->add_parameter("data", s); + // k=5 equals axis=1 dimension of 5 + auto r = mm->add_instruction( + migraphx::make_op("topk", {{"axis", 1}, {"k", 5}, {"largest", 0}}), data); + auto r0 = mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), r); + auto r1 = mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 1}}), r); + mm->add_return({r0, r1}); + + p.compile(migraphx::make_target("ref")); + + std::vector input_data = { + 2.1, 2.3, 2.0, 2.5, 1.9, + 3.3, 0.2, 4.5, 0.1, 0.8, + 1.0, 4.5, 2.1, 0.8, 1.5, + }; + migraphx::parameter_map pp; + pp["data"] = migraphx::argument(s, input_data.data()); + auto rets = p.eval(pp); + + std::vector ret_val; + rets.front().visit([&](auto v) { ret_val.assign(v.begin(), v.end()); }); + std::vector ret_ind; + rets.back().visit([&](auto v) { ret_ind.assign(v.begin(), v.end()); }); + + // All 5 elements returned per row, sorted ascending (smallest first) + EXPECT(ret_val.size() == 15u); + std::vector gold_val = {1.9, 2.0, 2.1, 2.3, 2.5, + 0.1, 0.2, 0.8, 3.3, 4.5, + 0.8, 1.0, 1.5, 2.1, 4.5}; + EXPECT(ret_val == gold_val); + std::vector gold_ind = {4, 2, 0, 1, 3, + 3, 1, 4, 0, 2, + 3, 0, 4, 2, 1}; + EXPECT(ret_ind == gold_ind); +} diff --git a/test/verify/test_topk_dynamic.cpp b/test/verify/test_topk_dynamic.cpp new file mode 100644 index 00000000000..13e8677929a --- /dev/null +++ b/test/verify/test_topk_dynamic.cpp @@ -0,0 +1,57 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "verify_program.hpp" +#include +#include +#include + +// Test k > n with dynamic shapes: k=100 placeholder but runtime input has fewer elements +template +struct test_topk_dynamic + : verify_program> +{ + migraphx::program create_program() const + { + migraphx::program p; + auto* mm = p.get_main_module(); + std::vector dds = {{1, 100}}; + migraphx::shape s{migraphx::shape::float_type, dds}; + auto data = mm->add_parameter("data", s); + auto r = mm->add_instruction( + migraphx::make_op("topk", {{"axis", 0}, {"k", 100}, {"largest", 1}}), data); + auto r0 = mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), r); + auto r1 = mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 1}}), r); + mm->add_return({r0, r1}); + return p; + } + + std::unordered_map get_test_dims() const + { + return {{"data", migraphx::shape{migraphx::shape::float_type, {N}}}}; + } +}; + +template struct test_topk_dynamic<10>; + From b99845746a5e63ff3c4a8d998b48f469e8fb4281 Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Mon, 11 May 2026 20:00:24 -0700 Subject: [PATCH 02/27] fix debug version print and use internal allocation for dynamic output --- src/program.cpp | 8 +++++++- src/replace_allocate.cpp | 6 +++++- src/targets/gpu/compile_ops.cpp | 8 +------- src/targets/gpu/include/migraphx/gpu/hip.hpp | 2 +- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/program.cpp b/src/program.cpp index 617a215f361..a77e8cc20d2 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -671,7 +671,13 @@ std::vector program::eval(const parameter_map& params, if(trace_level > 0) { ctx.finish(); - std::cout << "Run instruction: " << ins_out.at(ins) << std::endl; + //The ins_out map is populated from the main module's + //but when dynamic_code_object_op::compute recursively calls generic_eval + //on its runtime sub-module ins_out don't have it. + if(ins_out.find(ins) != ins_out.end()) + std::cout << "Run instruction: " << ins_out.at(ins) << std::endl; + else + std::cout << "Run instruction: " << ins->name() << " (submodule)" << std::endl; } timer t{}; auto result = f(); diff --git a/src/replace_allocate.cpp b/src/replace_allocate.cpp index a8573f94ea0..fa5f0b6bc0c 100644 --- a/src/replace_allocate.cpp +++ b/src/replace_allocate.cpp @@ -185,7 +185,11 @@ void replace_allocate::apply(module_pass_manager& mpm) const continue; auto s = ins->get_shape(); - if(not root_offload_copy and model.needs_out_params() and contains(mod_output_names, ins)) + // Dynamic outputs should use internal allocation (hip::allocate) rather than + // becoming output parameters. This lets dynamic_code_object_op::compute() + // reshape the buffer to the actual runtime dimensions after execution. + if(not root_offload_copy and model.needs_out_params() and contains(mod_output_names, ins) and + not s.any_of_dynamic()) { auto out_param = m.add_parameter(mod_output_names[ins], s); if(contains(mod_output_debug_symbols, ins)) diff --git a/src/targets/gpu/compile_ops.cpp b/src/targets/gpu/compile_ops.cpp index 49c9e86e6ff..cb7c5cb920d 100644 --- a/src/targets/gpu/compile_ops.cpp +++ b/src/targets/gpu/compile_ops.cpp @@ -159,13 +159,7 @@ struct dynamic_code_object_op return results.front(); } - - // dynamic_code_object_op::compute is never called for static instructions. - // Always recompute the output shape from actual (static) input shapes. - // The pre-allocated output buffer may have been sized for the max dynamic - // dimensions (e.g. TopK with k=100 placeholder) while the actual output - // is smaller (e.g. k=10 when input n=10). Reshaping the buffer to the - // computed shape ensures the compiled kernel sees the correct dimensions. + //static shape code can't be here, remove the check. auto out_shape = pre_op.compute_shape(to_shapes(static_args), module_args); static_args[static_args.size() - 1] = output_arg.reshape(out_shape); diff --git a/src/targets/gpu/include/migraphx/gpu/hip.hpp b/src/targets/gpu/include/migraphx/gpu/hip.hpp index 7dd8c503864..e1408c5cdf2 100644 --- a/src/targets/gpu/include/migraphx/gpu/hip.hpp +++ b/src/targets/gpu/include/migraphx/gpu/hip.hpp @@ -259,7 +259,7 @@ struct hip_allocate_memory argument a = allocate_gpu(s); store_preallocated_param(ctx, id, a); } - // This scratch buffers need to be use in runtime JIT + // This scratch buffers need to be use in runtime JIT. Some op returns tuple output e.g topk it need global lifetime buffer to avoid dangling. lifetime get_lifetime() const { return lifetime::global; } }; From 3527d11c6743f2f00ae3ef9927591fd562479c7d Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Tue, 12 May 2026 02:07:37 -0700 Subject: [PATCH 03/27] add bound check --- src/targets/gpu/compile_ops.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/targets/gpu/compile_ops.cpp b/src/targets/gpu/compile_ops.cpp index cb7c5cb920d..089f538bcd7 100644 --- a/src/targets/gpu/compile_ops.cpp +++ b/src/targets/gpu/compile_ops.cpp @@ -162,6 +162,9 @@ struct dynamic_code_object_op //static shape code can't be here, remove the check. auto out_shape = pre_op.compute_shape(to_shapes(static_args), module_args); static_args[static_args.size() - 1] = output_arg.reshape(out_shape); + // Skip JIT compilation when dynamic shape resolves to 0 elements at runtime + if(args.front().get_shape().elements() == 0) + return static_args.back(); // Rewrite submodule without dynamic shapes to be used as the IR for compilation From 171aec90ec5567f39d92321e5c4dc55ff29bf1b4 Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Wed, 13 May 2026 15:17:02 +0800 Subject: [PATCH 04/27] Update src/include/migraphx/op/topk.hpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/include/migraphx/op/topk.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/migraphx/op/topk.hpp b/src/include/migraphx/op/topk.hpp index f9a53c99ebd..894c944acd5 100644 --- a/src/include/migraphx/op/topk.hpp +++ b/src/include/migraphx/op/topk.hpp @@ -77,7 +77,7 @@ struct topk } else { - auto lens = inputs.at(0).lens(); + auto lens = inputs.at(0).lens(); // Clamp k to input size: k may be a placeholder (max dim) from parse time auto kk = std::min(static_cast(k), lens[axis]); lens[axis] = kk; From 222a2dd20947c0af07f30531f7aaac018298e851 Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Wed, 13 May 2026 15:17:13 +0800 Subject: [PATCH 05/27] Update src/include/migraphx/dyn_output.hpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/include/migraphx/dyn_output.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/migraphx/dyn_output.hpp b/src/include/migraphx/dyn_output.hpp index 2963751b038..fe8a636d7cb 100644 --- a/src/include/migraphx/dyn_output.hpp +++ b/src/include/migraphx/dyn_output.hpp @@ -52,7 +52,7 @@ struct compute_output_shape operator dyn_output() const { return ins_inputs([](const auto& x, shape ins_shape, const std::vector& inputs) { - if(ins_shape.any_of_dynamic()) //some op returns a tuple shape e.g. TopK + if(ins_shape.any_of_dynamic()) // some op returns a tuple shape e.g. TopK return dyn_output{ins_shape, compute_shape(x, to_shapes(inputs))}; return dyn_output{ins_shape, ins_shape}; }); From 33398d8bfdcb2820907f0dcad97c09f05290ff31 Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Wed, 13 May 2026 15:17:28 +0800 Subject: [PATCH 06/27] Update src/program.cpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/program.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/program.cpp b/src/program.cpp index a77e8cc20d2..41889d7a1e0 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -671,9 +671,9 @@ std::vector program::eval(const parameter_map& params, if(trace_level > 0) { ctx.finish(); - //The ins_out map is populated from the main module's - //but when dynamic_code_object_op::compute recursively calls generic_eval - //on its runtime sub-module ins_out don't have it. + // The ins_out map is populated from the main module's + // but when dynamic_code_object_op::compute recursively calls generic_eval + // on its runtime sub-module ins_out don't have it. if(ins_out.find(ins) != ins_out.end()) std::cout << "Run instruction: " << ins_out.at(ins) << std::endl; else From b067618d4ea65e876e342780d3e0c218e3a391af Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Wed, 13 May 2026 15:18:00 +0800 Subject: [PATCH 07/27] Update src/replace_allocate.cpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/replace_allocate.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/replace_allocate.cpp b/src/replace_allocate.cpp index fa5f0b6bc0c..8a7098eeb05 100644 --- a/src/replace_allocate.cpp +++ b/src/replace_allocate.cpp @@ -188,8 +188,8 @@ void replace_allocate::apply(module_pass_manager& mpm) const // Dynamic outputs should use internal allocation (hip::allocate) rather than // becoming output parameters. This lets dynamic_code_object_op::compute() // reshape the buffer to the actual runtime dimensions after execution. - if(not root_offload_copy and model.needs_out_params() and contains(mod_output_names, ins) and - not s.any_of_dynamic()) + if(not root_offload_copy and model.needs_out_params() and + contains(mod_output_names, ins) and not s.any_of_dynamic()) { auto out_param = m.add_parameter(mod_output_names[ins], s); if(contains(mod_output_debug_symbols, ins)) From 36ad89d3c454d87bc9dca0527783510053a3cb7c Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Wed, 13 May 2026 15:18:11 +0800 Subject: [PATCH 08/27] Update src/targets/gpu/compile_ops.cpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/targets/gpu/compile_ops.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/targets/gpu/compile_ops.cpp b/src/targets/gpu/compile_ops.cpp index 089f538bcd7..11afc741f11 100644 --- a/src/targets/gpu/compile_ops.cpp +++ b/src/targets/gpu/compile_ops.cpp @@ -159,7 +159,7 @@ struct dynamic_code_object_op return results.front(); } - //static shape code can't be here, remove the check. + // static shape code can't be here, remove the check. auto out_shape = pre_op.compute_shape(to_shapes(static_args), module_args); static_args[static_args.size() - 1] = output_arg.reshape(out_shape); // Skip JIT compilation when dynamic shape resolves to 0 elements at runtime From b55ae071aedec53f05b326021d130fc05991fca0 Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Wed, 13 May 2026 15:18:35 +0800 Subject: [PATCH 09/27] Update test/verify/test_topk_dynamic.cpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- test/verify/test_topk_dynamic.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/test/verify/test_topk_dynamic.cpp b/test/verify/test_topk_dynamic.cpp index 13e8677929a..abf8d23c04e 100644 --- a/test/verify/test_topk_dynamic.cpp +++ b/test/verify/test_topk_dynamic.cpp @@ -54,4 +54,3 @@ struct test_topk_dynamic }; template struct test_topk_dynamic<10>; - From df26e31833d54a03a9fbe0ba1283b7cde2c9a52d Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Wed, 13 May 2026 15:19:12 +0800 Subject: [PATCH 10/27] Update test/verify/test_topk_dynamic.cpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- test/verify/test_topk_dynamic.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/verify/test_topk_dynamic.cpp b/test/verify/test_topk_dynamic.cpp index abf8d23c04e..c99e5f55561 100644 --- a/test/verify/test_topk_dynamic.cpp +++ b/test/verify/test_topk_dynamic.cpp @@ -35,7 +35,7 @@ struct test_topk_dynamic migraphx::program create_program() const { migraphx::program p; - auto* mm = p.get_main_module(); + auto* mm = p.get_main_module(); std::vector dds = {{1, 100}}; migraphx::shape s{migraphx::shape::float_type, dds}; auto data = mm->add_parameter("data", s); From 89f31badf3a43d7bdc0b17eb7117efb86822b93e Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Wed, 13 May 2026 17:33:10 +0800 Subject: [PATCH 11/27] Update src/targets/gpu/compile_ops.cpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/targets/gpu/compile_ops.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/targets/gpu/compile_ops.cpp b/src/targets/gpu/compile_ops.cpp index 11afc741f11..83f1f84acf1 100644 --- a/src/targets/gpu/compile_ops.cpp +++ b/src/targets/gpu/compile_ops.cpp @@ -165,7 +165,6 @@ struct dynamic_code_object_op // Skip JIT compilation when dynamic shape resolves to 0 elements at runtime if(args.front().get_shape().elements() == 0) return static_args.back(); - // Rewrite submodule without dynamic shapes to be used as the IR for compilation module static_submod; From 2624cecba440601badbea0df31aed4aed1522f86 Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Wed, 13 May 2026 17:33:21 +0800 Subject: [PATCH 12/27] Update src/targets/gpu/include/migraphx/gpu/hip.hpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/targets/gpu/include/migraphx/gpu/hip.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/targets/gpu/include/migraphx/gpu/hip.hpp b/src/targets/gpu/include/migraphx/gpu/hip.hpp index e1408c5cdf2..b68a415da3d 100644 --- a/src/targets/gpu/include/migraphx/gpu/hip.hpp +++ b/src/targets/gpu/include/migraphx/gpu/hip.hpp @@ -259,7 +259,8 @@ struct hip_allocate_memory argument a = allocate_gpu(s); store_preallocated_param(ctx, id, a); } - // This scratch buffers need to be use in runtime JIT. Some op returns tuple output e.g topk it need global lifetime buffer to avoid dangling. + // This scratch buffers need to be use in runtime JIT. Some op returns tuple output e.g topk it + // need global lifetime buffer to avoid dangling. lifetime get_lifetime() const { return lifetime::global; } }; From bf9e4e4041d0dad5e874ef6cf34bb19fc91616d0 Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Wed, 13 May 2026 23:42:21 -0700 Subject: [PATCH 13/27] remove lifetime::global and s.any_of_dynamic check --- src/include/migraphx/dyn_output.hpp | 2 +- src/onnx/parse_topk.cpp | 2 +- src/replace_allocate.cpp | 6 +----- src/targets/gpu/include/migraphx/gpu/hip.hpp | 1 - test/ref/topk.cpp | 2 +- 5 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/include/migraphx/dyn_output.hpp b/src/include/migraphx/dyn_output.hpp index fe8a636d7cb..8e4b7f1d529 100644 --- a/src/include/migraphx/dyn_output.hpp +++ b/src/include/migraphx/dyn_output.hpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/src/onnx/parse_topk.cpp b/src/onnx/parse_topk.cpp index 26027d04352..81853857136 100644 --- a/src/onnx/parse_topk.cpp +++ b/src/onnx/parse_topk.cpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/src/replace_allocate.cpp b/src/replace_allocate.cpp index 8a7098eeb05..a8573f94ea0 100644 --- a/src/replace_allocate.cpp +++ b/src/replace_allocate.cpp @@ -185,11 +185,7 @@ void replace_allocate::apply(module_pass_manager& mpm) const continue; auto s = ins->get_shape(); - // Dynamic outputs should use internal allocation (hip::allocate) rather than - // becoming output parameters. This lets dynamic_code_object_op::compute() - // reshape the buffer to the actual runtime dimensions after execution. - if(not root_offload_copy and model.needs_out_params() and - contains(mod_output_names, ins) and not s.any_of_dynamic()) + if(not root_offload_copy and model.needs_out_params() and contains(mod_output_names, ins)) { auto out_param = m.add_parameter(mod_output_names[ins], s); if(contains(mod_output_debug_symbols, ins)) diff --git a/src/targets/gpu/include/migraphx/gpu/hip.hpp b/src/targets/gpu/include/migraphx/gpu/hip.hpp index b68a415da3d..1000777abb2 100644 --- a/src/targets/gpu/include/migraphx/gpu/hip.hpp +++ b/src/targets/gpu/include/migraphx/gpu/hip.hpp @@ -253,7 +253,6 @@ struct hip_allocate_memory { return get_preallocation(ctx, id); } - void finalize(context& ctx, const shape&, const std::vector&) const { argument a = allocate_gpu(s); diff --git a/test/ref/topk.cpp b/test/ref/topk.cpp index 5e77029bd38..d59dcf1b34a 100644 --- a/test/ref/topk.cpp +++ b/test/ref/topk.cpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal From e4073bd07fdcce8f0541c62bc528e4fa9068a62a Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Wed, 13 May 2026 23:43:16 -0700 Subject: [PATCH 14/27] fix license check --- test/verify/test_topk_dynamic.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/verify/test_topk_dynamic.cpp b/test/verify/test_topk_dynamic.cpp index c99e5f55561..9258c344423 100644 --- a/test/verify/test_topk_dynamic.cpp +++ b/test/verify/test_topk_dynamic.cpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal From eedd15f73c310991c09a71fea0e31d07dfb5f05a Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Thu, 14 May 2026 00:39:23 -0700 Subject: [PATCH 15/27] remove lifetime::global --- src/targets/gpu/include/migraphx/gpu/hip.hpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/targets/gpu/include/migraphx/gpu/hip.hpp b/src/targets/gpu/include/migraphx/gpu/hip.hpp index 1000777abb2..c1134e13b5c 100644 --- a/src/targets/gpu/include/migraphx/gpu/hip.hpp +++ b/src/targets/gpu/include/migraphx/gpu/hip.hpp @@ -30,7 +30,6 @@ #include #include #include -#include #include namespace migraphx { @@ -258,9 +257,6 @@ struct hip_allocate_memory argument a = allocate_gpu(s); store_preallocated_param(ctx, id, a); } - // This scratch buffers need to be use in runtime JIT. Some op returns tuple output e.g topk it - // need global lifetime buffer to avoid dangling. - lifetime get_lifetime() const { return lifetime::global; } }; struct hip_copy_literal From 49b757b6f9844207f0e0e9d05ca301edf621bb1a Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Thu, 14 May 2026 00:43:12 -0700 Subject: [PATCH 16/27] format change --- src/targets/gpu/include/migraphx/gpu/hip.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/targets/gpu/include/migraphx/gpu/hip.hpp b/src/targets/gpu/include/migraphx/gpu/hip.hpp index c1134e13b5c..3e2113871b4 100644 --- a/src/targets/gpu/include/migraphx/gpu/hip.hpp +++ b/src/targets/gpu/include/migraphx/gpu/hip.hpp @@ -252,6 +252,7 @@ struct hip_allocate_memory { return get_preallocation(ctx, id); } + void finalize(context& ctx, const shape&, const std::vector&) const { argument a = allocate_gpu(s); From 3499f43df32996df68123763a986bd705cb6bf2c Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Thu, 14 May 2026 19:54:22 +0800 Subject: [PATCH 17/27] Update test/ref/topk.cpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- test/ref/topk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/ref/topk.cpp b/test/ref/topk.cpp index d59dcf1b34a..ef37a592c6c 100644 --- a/test/ref/topk.cpp +++ b/test/ref/topk.cpp @@ -156,7 +156,7 @@ TEST_CASE(topk_k_greater_than_n_dynamic) migraphx::shape s{migraphx::shape::float_type, dds}; auto data = mm->add_parameter("data", s); // k=100 is the max placeholder from parse time - auto r = mm->add_instruction( + auto r = mm->add_instruction( migraphx::make_op("topk", {{"axis", 0}, {"k", 100}, {"largest", 1}}), data); auto r0 = mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), r); auto r1 = mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 1}}), r); From d5d0d3ee3a3c28567867f6e87897dc4ff7283099 Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Thu, 14 May 2026 19:54:36 +0800 Subject: [PATCH 18/27] Update src/targets/gpu/lowering.cpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/targets/gpu/lowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/targets/gpu/lowering.cpp b/src/targets/gpu/lowering.cpp index e5547e5216e..f19f2716226 100644 --- a/src/targets/gpu/lowering.cpp +++ b/src/targets/gpu/lowering.cpp @@ -239,7 +239,7 @@ struct miopen_apply instruction_ref insert_dynamic_code_object_op(instruction_ref ins) const { assert(ins->get_operator().name() == "gpu::precompile_op"); - //some op returns a tuple shape e.g. TopK + // some op returns a tuple shape e.g. TopK if(not ins->get_shape().any_of_dynamic()) return ins; From 68332ccae373f1f7b051eab10215c1132b7aeb3d Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Thu, 14 May 2026 19:54:57 +0800 Subject: [PATCH 19/27] Update test/ref/topk.cpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- test/ref/topk.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/ref/topk.cpp b/test/ref/topk.cpp index ef37a592c6c..2218f4bcab1 100644 --- a/test/ref/topk.cpp +++ b/test/ref/topk.cpp @@ -192,8 +192,8 @@ TEST_CASE(topk_k_equals_n) migraphx::shape s{migraphx::shape::float_type, {3, 5}}; auto data = mm->add_parameter("data", s); // k=5 equals axis=1 dimension of 5 - auto r = mm->add_instruction( - migraphx::make_op("topk", {{"axis", 1}, {"k", 5}, {"largest", 0}}), data); + auto r = mm->add_instruction(migraphx::make_op("topk", {{"axis", 1}, {"k", 5}, {"largest", 0}}), + data); auto r0 = mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), r); auto r1 = mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 1}}), r); mm->add_return({r0, r1}); From 0ee21eca3060a6849f389d947a33bbb3a4f8a939 Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Thu, 14 May 2026 19:55:10 +0800 Subject: [PATCH 20/27] Update test/ref/topk.cpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- test/ref/topk.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/test/ref/topk.cpp b/test/ref/topk.cpp index 2218f4bcab1..839d22d565b 100644 --- a/test/ref/topk.cpp +++ b/test/ref/topk.cpp @@ -201,9 +201,21 @@ TEST_CASE(topk_k_equals_n) p.compile(migraphx::make_target("ref")); std::vector input_data = { - 2.1, 2.3, 2.0, 2.5, 1.9, - 3.3, 0.2, 4.5, 0.1, 0.8, - 1.0, 4.5, 2.1, 0.8, 1.5, + 2.1, + 2.3, + 2.0, + 2.5, + 1.9, + 3.3, + 0.2, + 4.5, + 0.1, + 0.8, + 1.0, + 4.5, + 2.1, + 0.8, + 1.5, }; migraphx::parameter_map pp; pp["data"] = migraphx::argument(s, input_data.data()); From 3431d5725e661038b30cf186a102421d1ecd20da Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Thu, 14 May 2026 19:56:39 +0800 Subject: [PATCH 21/27] Update test/ref/topk.cpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- test/ref/topk.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/ref/topk.cpp b/test/ref/topk.cpp index 839d22d565b..9f00f60fe24 100644 --- a/test/ref/topk.cpp +++ b/test/ref/topk.cpp @@ -228,9 +228,8 @@ TEST_CASE(topk_k_equals_n) // All 5 elements returned per row, sorted ascending (smallest first) EXPECT(ret_val.size() == 15u); - std::vector gold_val = {1.9, 2.0, 2.1, 2.3, 2.5, - 0.1, 0.2, 0.8, 3.3, 4.5, - 0.8, 1.0, 1.5, 2.1, 4.5}; + std::vector gold_val = { + 1.9, 2.0, 2.1, 2.3, 2.5, 0.1, 0.2, 0.8, 3.3, 4.5, 0.8, 1.0, 1.5, 2.1, 4.5}; EXPECT(ret_val == gold_val); std::vector gold_ind = {4, 2, 0, 1, 3, 3, 1, 4, 0, 2, From c360580755eb976ec7fab9fdb926cf20099c7c81 Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Thu, 14 May 2026 19:56:52 +0800 Subject: [PATCH 22/27] Update test/ref/topk.cpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- test/ref/topk.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/ref/topk.cpp b/test/ref/topk.cpp index 9f00f60fe24..8b24de0cad3 100644 --- a/test/ref/topk.cpp +++ b/test/ref/topk.cpp @@ -231,8 +231,6 @@ TEST_CASE(topk_k_equals_n) std::vector gold_val = { 1.9, 2.0, 2.1, 2.3, 2.5, 0.1, 0.2, 0.8, 3.3, 4.5, 0.8, 1.0, 1.5, 2.1, 4.5}; EXPECT(ret_val == gold_val); - std::vector gold_ind = {4, 2, 0, 1, 3, - 3, 1, 4, 0, 2, - 3, 0, 4, 2, 1}; + std::vector gold_ind = {4, 2, 0, 1, 3, 3, 1, 4, 0, 2, 3, 0, 4, 2, 1}; EXPECT(ret_ind == gold_ind); } From e6090e39bde7750318e2a1d09b00b0757677532a Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Thu, 14 May 2026 19:57:08 +0800 Subject: [PATCH 23/27] Update test/verify/test_topk_dynamic.cpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- test/verify/test_topk_dynamic.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/verify/test_topk_dynamic.cpp b/test/verify/test_topk_dynamic.cpp index 9258c344423..89df61959fd 100644 --- a/test/verify/test_topk_dynamic.cpp +++ b/test/verify/test_topk_dynamic.cpp @@ -29,8 +29,7 @@ // Test k > n with dynamic shapes: k=100 placeholder but runtime input has fewer elements template -struct test_topk_dynamic - : verify_program> +struct test_topk_dynamic : verify_program> { migraphx::program create_program() const { From 688864a65afbdded3105340e6d374a6443852715 Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Thu, 14 May 2026 19:57:22 +0800 Subject: [PATCH 24/27] Update src/targets/gpu/include/migraphx/gpu/hip.hpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/targets/gpu/include/migraphx/gpu/hip.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/targets/gpu/include/migraphx/gpu/hip.hpp b/src/targets/gpu/include/migraphx/gpu/hip.hpp index 3e2113871b4..c1134e13b5c 100644 --- a/src/targets/gpu/include/migraphx/gpu/hip.hpp +++ b/src/targets/gpu/include/migraphx/gpu/hip.hpp @@ -252,7 +252,6 @@ struct hip_allocate_memory { return get_preallocation(ctx, id); } - void finalize(context& ctx, const shape&, const std::vector&) const { argument a = allocate_gpu(s); From a048213258b8b3ee866c79b715f167793d49c16d Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Sat, 16 May 2026 09:32:14 +0800 Subject: [PATCH 25/27] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/include/migraphx/op/topk.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/include/migraphx/op/topk.hpp b/src/include/migraphx/op/topk.hpp index 894c944acd5..3d72a2e0c66 100644 --- a/src/include/migraphx/op/topk.hpp +++ b/src/include/migraphx/op/topk.hpp @@ -67,9 +67,11 @@ struct topk if(inputs.at(0).dynamic()) { auto dyn_dims = inputs.at(0).dyn_dims(); + auto min_lens_vec = inputs.at(0).min_lens(); auto max_lens_vec = inputs.at(0).max_lens(); - auto kk = std::min(static_cast(k), max_lens_vec[axis]); - dyn_dims[axis] = {kk, kk}; + auto min_kk = std::min(static_cast(k), min_lens_vec[axis]); + auto max_kk = std::min(static_cast(k), max_lens_vec[axis]); + dyn_dims[axis] = {min_kk, max_kk}; shape s_val{type, dyn_dims}; shape s_ind{shape::int64_type, dyn_dims}; From 0d08c501459b95dec0bb6e4e6265f0d4af507368 Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Sat, 16 May 2026 09:33:44 +0800 Subject: [PATCH 26/27] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- test/ref/topk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/ref/topk.cpp b/test/ref/topk.cpp index 8b24de0cad3..3f71f4f8879 100644 --- a/test/ref/topk.cpp +++ b/test/ref/topk.cpp @@ -146,7 +146,7 @@ TEST_CASE(topk_smallest_custom_indices) EXPECT(results.second == gold_ind); } -// Test k > n with dynamic shapes: k=100 placeholder but runtime input has 10 elements +// Test k > n with dynamic shapes: k=100 placeholder but runtime input has 5 elements TEST_CASE(topk_k_greater_than_n_dynamic) { migraphx::program p; From 040e186fb9b4e8075011c5414b0ee7f53e7c4549 Mon Sep 17 00:00:00 2001 From: kazhang2 Date: Mon, 1 Jun 2026 14:10:05 +0800 Subject: [PATCH 27/27] Change topk op k from int64_t to std::optional to explicitly represent unknown k instead of using a placeholder value --- src/include/migraphx/op/topk.hpp | 26 ++++++++++++++++---------- src/onnx/parse_topk.cpp | 29 ++++++++++------------------- src/rewrite_topk.cpp | 9 +++++---- src/targets/gpu/topk.cpp | 5 +++-- 4 files changed, 34 insertions(+), 35 deletions(-) diff --git a/src/include/migraphx/op/topk.hpp b/src/include/migraphx/op/topk.hpp index 3d72a2e0c66..254df980c18 100644 --- a/src/include/migraphx/op/topk.hpp +++ b/src/include/migraphx/op/topk.hpp @@ -33,6 +33,7 @@ #include #include #include +#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { @@ -40,7 +41,7 @@ namespace op { struct topk { - int64_t k = 1; + std::optional k; int64_t axis = 0; bool largest = true; @@ -67,11 +68,14 @@ struct topk if(inputs.at(0).dynamic()) { auto dyn_dims = inputs.at(0).dyn_dims(); - auto min_lens_vec = inputs.at(0).min_lens(); - auto max_lens_vec = inputs.at(0).max_lens(); - auto min_kk = std::min(static_cast(k), min_lens_vec[axis]); - auto max_kk = std::min(static_cast(k), max_lens_vec[axis]); - dyn_dims[axis] = {min_kk, max_kk}; + if(k.has_value()) + { + auto min_lens_vec = inputs.at(0).min_lens(); + auto max_lens_vec = inputs.at(0).max_lens(); + auto min_kk = std::min(static_cast(*k), min_lens_vec[axis]); + auto max_kk = std::min(static_cast(*k), max_lens_vec[axis]); + dyn_dims[axis] = {min_kk, max_kk}; + } shape s_val{type, dyn_dims}; shape s_ind{shape::int64_type, dyn_dims}; @@ -80,9 +84,11 @@ struct topk else { auto lens = inputs.at(0).lens(); - // Clamp k to input size: k may be a placeholder (max dim) from parse time - auto kk = std::min(static_cast(k), lens[axis]); - lens[axis] = kk; + if(k.has_value()) + { + auto kk = std::min(static_cast(*k), lens[axis]); + lens[axis] = kk; + } shape s_val{type, lens}; shape s_ind{shape::int64_type, lens}; @@ -110,7 +116,7 @@ struct topk argument res_ind{vec_ss.back()}; auto in_val = args.front(); auto relements = in_val.get_shape().lens()[axis]; - auto actual_k = std::min(static_cast(k), relements); + auto actual_k = k.has_value() ? std::min(static_cast(*k), relements) : relements; auto make_indices = [&](const auto& m_idx) { return [&](int64_t i) { if(args.size() < 2) diff --git a/src/onnx/parse_topk.cpp b/src/onnx/parse_topk.cpp index 81853857136..9d181928490 100644 --- a/src/onnx/parse_topk.cpp +++ b/src/onnx/parse_topk.cpp @@ -26,6 +26,7 @@ #include #include #include +#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { @@ -52,26 +53,11 @@ struct parse_topk : op_parser axis = parser.parse_value(info.attributes.at("axis")).at(); } - int64_t k = 0; + std::optional k; if(args.size() == 2) { auto arg_k = args.at(1)->eval(); - if(arg_k.empty()) - { - // k is not constant: use the input dimension along the topk axis - auto input_shape = args.at(0)->get_shape(); - auto ndim = input_shape.ndim(); - auto norm_axis = axis < 0 ? axis + static_cast(ndim) : axis; - if(input_shape.dynamic()) - { - k = input_shape.dyn_dims().at(norm_axis).get_interval().max; - } - else - { - k = input_shape.lens().at(norm_axis); - } - } - else + if(not arg_k.empty()) { k = arg_k.at(); } @@ -81,8 +67,13 @@ struct parse_topk : op_parser k = info.attributes.at("k").i(); } - auto topk_ret = info.add_instruction( - make_op("topk", {{"k", k}, {"axis", axis}, {"largest", largest}}), args.at(0)); + auto topk_ret = + k.has_value() + ? info.add_instruction( + make_op("topk", {{"k", *k}, {"axis", axis}, {"largest", largest}}), + args.at(0)) + : info.add_instruction( + make_op("topk", {{"axis", axis}, {"largest", largest}}), args.at(0)); auto ret_val = info.add_instruction(make_op("get_tuple_elem", {{"index", 0}}), topk_ret); auto ret_ind = info.add_instruction(make_op("get_tuple_elem", {{"index", 1}}), topk_ret); diff --git a/src/rewrite_topk.cpp b/src/rewrite_topk.cpp index f4abd88972a..83c91bc421e 100644 --- a/src/rewrite_topk.cpp +++ b/src/rewrite_topk.cpp @@ -37,19 +37,20 @@ namespace { struct find_large_topk { std::size_t n_threshold = 0; - auto matcher() const { return match::name("topk"); } + auto matcher() const + { + return match::name("topk")(match::arg(0)(match::not_dynamic_shape())); + } void apply(module& m, const match::matcher_result& r) const { auto ins = r.result; auto input = ins->inputs().front(); - if(input->get_shape().dynamic()) - return; auto op = ins->get_operator().to_value(); auto axis = op["axis"].to(); - auto k = op["k"].to(); auto dims = input->get_shape().lens(); auto n = dims.at(axis); + auto k = op["k"].is_null() ? static_cast(n) : op["k"].to(); if(n < n_threshold) return; diff --git a/src/targets/gpu/topk.cpp b/src/targets/gpu/topk.cpp index 2e799c650af..278d05d3a9d 100644 --- a/src/targets/gpu/topk.cpp +++ b/src/targets/gpu/topk.cpp @@ -37,17 +37,18 @@ shape hip_topk::compute_shape(std::vector inputs) const argument hip_topk::compute(context& ctx, const shape&, const std::vector& args) const { auto outputs = args.back().get_sub_objects(); + auto actual_k = op.k.has_value() ? *op.k : static_cast(args[0].get_shape().lens()[op.axis]); return op.largest ? device::topk_largest(ctx.get_stream().get(), outputs.front(), outputs.back(), args[0], - op.k, + actual_k, op.axis) : device::topk_smallest(ctx.get_stream().get(), outputs.front(), outputs.back(), args[0], - op.k, + actual_k, op.axis); }