Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,8 @@ jobs:
- name: Run sample tests (py -> pto -> cpp)
shell: bash
env:
CI_EVENT_NAME: ${{ github.event_name }}
WORKFLOW_SOC_VERSION: ${{ github.event.inputs.soc_version || 'Ascend910' }}
PTOAS_BIN: ${{ github.workspace }}/build/tools/ptoas/ptoas
PYTHON_BIN: /usr/bin/python3
MLIR_PYTHON_ROOT: ${{ env.MLIR_PYTHONPATH }}
Expand All @@ -213,7 +215,15 @@ jobs:
export PYTHONPATH="${MLIR_PYTHON_ROOT}:${PTO_PYTHON_ROOT}:${PYTHONPATH:-}"
export LD_LIBRARY_PATH="${LLVM_DIR}/lib:${PTO_INSTALL_DIR}/lib:${LD_LIBRARY_PATH:-}"
export PTOAS_OUT_DIR="${PAYLOAD_DIR}/test/samples"
bash test/samples/runop.sh --enablebc all
if [[ "${CI_EVENT_NAME}" == "workflow_dispatch" || "${CI_EVENT_NAME}" == "schedule" ]]; then
# Board-validation payloads must only contain the arch-matching
# direct .pto samples. Some A3/A5 qwen decode cases intentionally
# share the same testcase basename, so SKIP_CASES cannot
# distinguish them later once the payload is built.
SOC_VERSION="${WORKFLOW_SOC_VERSION}" bash test/samples/runop.sh --enablebc all
else
bash test/samples/runop.sh --enablebc all
fi

- name: Build payload artifact
if: >-
Expand Down Expand Up @@ -294,9 +304,7 @@ jobs:
# RUN_ONLY_CASES requests, so remote validation does not try to force
# A5-only cases through an A3 flow or vice versa.
A3_ONLY_CASES="partition5d,partition5d_dynamic,mrgsort,tmatmulk_autosync"
QWEN3_TILELET_A5_ONLY_CASES="$(printf 'qwen3_decode_layer_incore_%s,' {0..19})"
QWEN3_TILELET_A5_ONLY_CASES="${QWEN3_TILELET_A5_ONLY_CASES%,}"
A5_ONLY_CASES="partition5d_a5,partition5d_dynamic_a5,mrgsort_a5,tmatmulk_autosync_a5,tpack,${QWEN3_TILELET_A5_ONLY_CASES}"
A5_ONLY_CASES="partition5d_a5,partition5d_dynamic_a5,mrgsort_a5,tmatmulk_autosync_a5,tpack"

sv_lc="$(printf '%s' "${SOC_VERSION}" | tr '[:upper:]' '[:lower:]')"
is_a5=0
Expand Down
66 changes: 52 additions & 14 deletions test/npu_validation/scripts/generate_testcase.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,25 +86,56 @@
})

CASE_INT_SCALAR_DEFAULTS = {
"qwen3_decode_layer_incore_13": {
"v7": 64,
"qwen3_decode_incore_4": {
"v11": 1,
"v12": 0,
"v13": 1,
},
"qwen3_decode_layer_incore_14": {
"qwen3_decode_incore_5": {
"v4": 1,
"v5": 1,
"v6": 1,
"v7": 0,
},
"qwen3_decode_incore_6": {
"v5": 1,
"v6": 1,
"v7": 0,
},
"qwen3_decode_incore_7": {
"v4": 1,
"v5": 1,
"v6": 1,
"v7": 0,
},
"qwen3_decode_incore_8": {
"v5": 2,
"v6": 1,
},
"qwen3_decode_incore_9": {
"v4": 1,
"v5": 64,
},
"qwen3_decode_incore_10": {
"v4": 1,
"v5": 64,
},
"qwen3_decode_incore_12": {
"v4": 256,
},
"qwen3_decode_incore_13": {
"v4": 256,
},
"qwen3_decode_incore_15": {
"v4": 128,
},
"qwen3_decode_incore_16": {
"v4": 1,
"v5": 128,
},
Comment on lines +89 to +135
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The dictionary CASE_POINTER_COUNT_MINIMUMS is growing large. Consider moving these configuration values to a separate JSON or YAML file to improve maintainability and keep the script clean.

}

CASE_POINTER_COUNT_MINIMUMS = {
"qwen3_decode_layer_incore_13": {
"v2": 20480,
"v4": 131046528,
"v5": 131046528,
},
"qwen3_decode_layer_incore_14": {
"v1": 16384,
"v3": 651264,
},
}


Expand Down Expand Up @@ -878,6 +909,11 @@ def _copy_asset_if_needed(src: Path, dst: Path):
shutil.copy2(src, dst)


def _copy_custom_golden_helpers(sample_root: Path, output_dir: Path):
for helper in sample_root.glob("*_golden_*.py"):
_copy_asset_if_needed(helper, output_dir / helper.name)


def _replace_includes(text: str) -> str:
if "#include \"common/pto_instr.hpp\"" in text:
return text.replace("#include \"common/pto_instr.hpp\"", INCLUDE_REPLACEMENT.rstrip())
Expand Down Expand Up @@ -1937,8 +1973,10 @@ def generate_testcase(
else:
golden_py = golden_template.replace("@INPUT_GENERATE@", "\n".join(input_generate))
golden_dst.write_text(golden_py, encoding="utf-8")
if (custom_golden is not None or custom_compare is not None) and shared_validation_runtime.is_file():
_copy_asset_if_needed(shared_validation_runtime, output_dir / "validation_runtime.py")
if custom_golden is not None or custom_compare is not None:
_copy_custom_golden_helpers(sample_root, output_dir)
if shared_validation_runtime.is_file():
_copy_asset_if_needed(shared_validation_runtime, output_dir / "validation_runtime.py")

# Emit the kernel source, optionally injecting a packed-predicate preload to
# make TCMP/TCMPS outputs deterministic for byte-wise compares.
Expand Down
11 changes: 11 additions & 0 deletions test/samples/Qwen3DecodeA3/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Qwen3 decode PTO kernels for A3, generated from `pypto-lib/examples/models/qwen3/qwen3_32b_decode.py`.

Scope:
- compile-regression inputs for `ptoas`
- board-validation inputs with per-case custom golden

Notes:
- This directory vendors the 17 emitted `qwen3_decode_incore_*.pto` fragments for the A3 lowering.
- `runop.sh` defaults these cases to `--pto-level=level3`.
- `runop.sh` skips this directory on A5 / Ascend950 targets.
- Each case has a sibling `<case>_golden.py`; shared reference logic lives in `qwen3_decode_golden_lib.py`.
Loading
Loading