test: add qwen3 decode A3/A5 PTO cases #1450

Workflow file for this run

	name: CI

	on:
	push:
	pull_request:
	# Nightly remote-board validation (GitHub cron is UTC).
	# 22:00 CST (UTC+8) == 14:00 UTC.
	schedule:
	- cron: "0 14 * * *"
	workflow_dispatch:
	inputs:
	stage:
	description: "Validation stage (build\|run)"
	type: choice
	options: [build, run]
	default: run
	run_mode:
	description: "Run mode passed to generated CMake (sim\|npu)"
	type: choice
	options: [npu, sim]
	default: npu
	soc_version:
	description: "Ascend SoC version (e.g. Ascend910B1)"
	type: string
	default: Ascend910
	device_id:
	description: "aclrtSetDevice device id"
	type: string
	# NOTE: On our shared remote NPU host, device 0/1 may be unstable or
	# occupied. Default to a higher id to reduce flakiness for scheduled
	# runs. Override in workflow_dispatch if needed.
	default: "2"
	skip_cases:
	description: "Comma/space separated testcase names to skip (e.g. scatter,mrgsort)"
	type: string
	default: "mix_kernel,vadd_validshape,vadd_validshape_dynamic,print,storefp"
	run_only_cases:
	description: "Comma/space separated testcase names to run (empty = run all)"
	type: string
	default: ""
	pto_isa_repo:
	description: "pto-isa repo URL on remote"
	type: string
	default: https://gitcode.com/cann/pto-isa.git
	pto_isa_commit:
	description: "pto-isa ref (commit/tag/branch; empty = repo-pinned weekly commit)"
	type: string
	# NOTE: Pin a known-good GitCode commit for deterministic runs.
	default: 662d7f2a916d6bbde3109ce4a16ed5c28f5d900a
	remote_host:
	description: "SSH host/IP for the NPU machine"
	type: string
	default: 101.245.68.6
	remote_user:
	description: "SSH user for the NPU machine"
	type: string
	default: zhongxuan
	remote_port:
	description: "SSH port"
	type: string
	default: "22"

	permissions:
	contents: read
	actions: write
	pull-requests: read

	jobs:
	license-header-check:
	if: ${{ github.event_name == 'pull_request' \|\| github.event_name == 'push' }}
	runs-on: ubuntu-22.04
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	repository: ${{ github.event.pull_request.head.repo.full_name \|\| github.repository }}
	ref: ${{ github.event.pull_request.head.sha \|\| github.sha }}
	fetch-depth: 0
	persist-credentials: false

	- name: Check PR386 license headers
	env:
	GITHUB_TOKEN: ${{ github.token }}
	run: \|
	python3 .github/scripts/check_license_headers.py \
	--repo "${{ github.repository }}" \
	--event-name "${{ github.event_name }}" \
	--pr-number "${{ github.event.pull_request.number \|\| '' }}" \
	--base-sha "${{ github.event.pull_request.base.sha \|\| github.event.before \|\| '' }}" \
	--head-sha "${{ github.event.pull_request.head.sha \|\| github.sha }}" \
	--github-token "${GITHUB_TOKEN}"

	build-and-test:
	runs-on: ubuntu-22.04
	env:
	LLVM_COMMIT: cd708029e0b2869e80abe31ddb175f7c35361f90
	LLVM_DIR: ${{ github.workspace }}/llvm-project/llvm/build-shared
	PTO_INSTALL_DIR: ${{ github.workspace }}/install
	MLIR_PYTHONPATH: ${{ github.workspace }}/llvm-project/llvm/build-shared/tools/mlir/python_packages/mlir_core
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	repository: ${{ github.event.pull_request.head.repo.full_name \|\| github.repository }}
	ref: ${{ github.event.pull_request.head.sha \|\| github.sha }}
	fetch-depth: 1
	persist-credentials: false

	- name: Install dependencies
	run: \|
	sudo apt-get update
	sudo apt-get install -y \
	cmake git ninja-build \
	python3 python3-pip python3-venv \
	clang lld \
	libedit-dev zlib1g-dev libxml2-dev libzstd-dev
	python3 -m pip install --upgrade pip
	# LLVM/MLIR Python bindings are not yet compatible with pybind11 3.x.
	python3 -m pip install 'pybind11<3' numpy

	- name: Define payload paths
	shell: bash
	run: \|
	set -euo pipefail
	# NOTE: Some GitHub/GHES versions don't allow using `${{ runner.temp }}` in
	# `jobs.<job>.env`. Use the runtime env var instead.
	tmp_root="${RUNNER_TEMP:-${GITHUB_WORKSPACE}/.tmp}"
	echo "PAYLOAD_DIR=${tmp_root}/ptoas_payload" >> "${GITHUB_ENV}"
	echo "PAYLOAD_TGZ=${tmp_root}/ptoas_payload.tgz" >> "${GITHUB_ENV}"

	- name: Prepare payload dir
	shell: bash
	run: \|
	set -euo pipefail
	rm -rf "${PAYLOAD_DIR}" "${PAYLOAD_TGZ}"
	mkdir -p "${PAYLOAD_DIR}/test/samples"
	mkdir -p "${PAYLOAD_DIR}/test/npu_validation/scripts"
	mkdir -p "${PAYLOAD_DIR}/test/npu_validation/templates"

	# 先恢复 LLVM build 缓存
	- name: Restore LLVM build cache
	id: cache-llvm
	uses: actions/cache/restore@v4
	with:
	path: \|
	llvm-project/llvm/build-shared
	key: llvm-${{ runner.os }}-${{ env.LLVM_COMMIT }}-shared-mlirpy

	- name: Prepare LLVM source (no rebuild)
	run: \|
	mkdir -p llvm-project
	cd llvm-project

	# 如果 restore 只带来了 build-shared，这里补一个 git repo
	if [ ! -d .git ]; then
	git init
	git remote add origin https://github.com/llvm/llvm-project.git
	fi

	git fetch --depth 1 origin tag llvmorg-19.1.7
	git checkout "${LLVM_COMMIT}"

	- name: Build LLVM/MLIR (only if cache miss)
	if: steps.cache-llvm.outputs.cache-hit != 'true'
	run: \|
	cd llvm-project
	cmake -G Ninja -S llvm -B llvm/build-shared \
	-DLLVM_ENABLE_PROJECTS="mlir;clang" \
	-DBUILD_SHARED_LIBS=ON \
	-DMLIR_ENABLE_BINDINGS_PYTHON=ON \
	-DPython3_EXECUTABLE=python3 \
	-DCMAKE_BUILD_TYPE=Release \
	-DLLVM_TARGETS_TO_BUILD="host"

	ninja -C llvm/build-shared


	# LLVM build 完成后立即保存缓存，避免后续测试影响缓存内容
	- name: Save LLVM build cache
	if: steps.cache-llvm.outputs.cache-hit != 'true'
	uses: actions/cache/save@v4
	with:
	path: \|
	llvm-project/llvm/build-shared
	key: llvm-${{ runner.os }}-${{ env.LLVM_COMMIT }}-shared-mlirpy

	- name: Build PTOAS
	run: \|
	export PYBIND11_CMAKE_DIR="$(python3 -m pybind11 --cmakedir)"
	cmake -G Ninja -S . -B build \
	-DLLVM_DIR="${LLVM_DIR}/lib/cmake/llvm" \
	-DMLIR_DIR="${LLVM_DIR}/lib/cmake/mlir" \
	-DPython3_EXECUTABLE=python3 \
	-DPython3_FIND_STRATEGY=LOCATION \
	-Dpybind11_DIR="${PYBIND11_CMAKE_DIR}" \
	-DMLIR_ENABLE_BINDINGS_PYTHON=ON \
	-DMLIR_PYTHON_PACKAGE_DIR="${LLVM_DIR}/tools/mlir/python_packages/mlir_core" \
	-DCMAKE_INSTALL_PREFIX="${PTO_INSTALL_DIR}" \
	-DCMAKE_BUILD_TYPE=Release
	ninja -C build ptoas
	ninja -C build ptobc
	ninja -C build install

	- name: Run sample tests (py -> pto -> cpp)
	shell: bash
	env:
	PTOAS_BIN: ${{ github.workspace }}/build/tools/ptoas/ptoas
	PYTHON_BIN: /usr/bin/python3
	MLIR_PYTHON_ROOT: ${{ env.MLIR_PYTHONPATH }}
	PTO_PYTHON_ROOT: ${{ env.PTO_INSTALL_DIR }}/
	run: \|
	set -euo pipefail
	export PYTHONPATH="${MLIR_PYTHON_ROOT}:${PTO_PYTHON_ROOT}:${PYTHONPATH:-}"
	export LD_LIBRARY_PATH="${LLVM_DIR}/lib:${PTO_INSTALL_DIR}/lib:${LD_LIBRARY_PATH:-}"
	export PTOAS_OUT_DIR="${PAYLOAD_DIR}/test/samples"
	bash test/samples/runop.sh --enablebc all

	- name: Build payload artifact
	if: >-
	${{
	github.event_name == 'workflow_dispatch' \|\|
	github.event_name == 'schedule'
	}}
	shell: bash
	run: \|
	set -euo pipefail
	cp test/npu_validation/scripts/generate_testcase.py "${PAYLOAD_DIR}/test/npu_validation/scripts/"
	cp test/npu_validation/scripts/run_remote_npu_validation.sh "${PAYLOAD_DIR}/test/npu_validation/scripts/"
	cp test/npu_validation/templates/* "${PAYLOAD_DIR}/test/npu_validation/templates/"
	chmod +x "${PAYLOAD_DIR}/test/npu_validation/scripts/run_remote_npu_validation.sh"
	tar -czf "${PAYLOAD_TGZ}" -C "${PAYLOAD_DIR}" .

	- name: Upload payload artifact
	if: >-
	${{
	github.event_name == 'workflow_dispatch' \|\|
	github.event_name == 'schedule'
	}}
	uses: actions/upload-artifact@v4
	with:
	name: ptoas_payload
	path: ${{ env.PAYLOAD_TGZ }}
	if-no-files-found: error

	remote-npu-validation:
	needs: build-and-test
	runs-on: ubuntu-22.04
	timeout-minutes: 180
	concurrency:
	group: remote-npu-validation
	cancel-in-progress: false
	# Ordering: `needs: build-and-test` enforces "CI -> remote".
	if: >-
	${{
	(github.event_name == 'workflow_dispatch' \|\|
	github.event_name == 'schedule')
	}}
	env:
	PAYLOAD_DOWNLOAD_DIR: ${{ github.workspace }}/_payload
	PAYLOAD_TGZ: ${{ github.workspace }}/_payload/ptoas_payload.tgz
	# Temporary CI gate: skip cases that still error/flap on the remote NPU.
	# Update this list as we fix the underlying issues.
	DEFAULT_SKIP_CASES: >-
	mix_kernel,vadd_validshape,vadd_validshape_dynamic,print,storefp,Gemvmx
	steps:
	- name: Resolve validation parameters
	shell: bash
	env:
	STAGE: ${{ github.event.inputs.stage \|\| 'run' }}
	RUN_MODE: ${{ github.event.inputs.run_mode \|\| 'npu' }}
	SOC_VERSION: ${{ github.event.inputs.soc_version \|\| 'Ascend910' }}
	DEVICE_ID: ${{ github.event.inputs.device_id \|\| '2' }}
	SKIP_CASES: ${{ github.event.inputs.skip_cases \|\| '' }}
	RUN_ONLY_CASES: ${{ github.event.inputs.run_only_cases \|\| '' }}
	PTO_ISA_REPO: ${{ github.event.inputs.pto_isa_repo \|\| 'https://gitcode.com/cann/pto-isa.git' }}
	PTO_ISA_COMMIT: ${{ github.event.inputs.pto_isa_commit \|\| '662d7f2a916d6bbde3109ce4a16ed5c28f5d900a' }}
	REMOTE_HOST: ${{ github.event.inputs.remote_host \|\| '101.245.68.6' }}
	REMOTE_USER: ${{ github.event.inputs.remote_user \|\| 'zhongxuan' }}
	REMOTE_PORT: ${{ github.event.inputs.remote_port \|\| '22' }}
	run: \|
	set -euo pipefail
	# For scheduled runs, default to DEFAULT_SKIP_CASES (known-bad/flaky).
	# For workflow_dispatch runs, honor the user's input (the UI default
	# is pre-filled but can be edited to run everything).
	if [[ "${GITHUB_EVENT_NAME}" != "workflow_dispatch" ]]; then
	if [[ -z "${SKIP_CASES}" ]]; then
	SKIP_CASES="${DEFAULT_SKIP_CASES}"
	fi
	fi

	# Some validation samples are arch-specific due to stricter pto-isa
	# static checks and A5-only tile layouts. Always skip the
	# non-matching variant based on SOC_VERSION, even for explicit
	# RUN_ONLY_CASES requests, so remote validation does not try to force
	# A5-only cases through an A3 flow or vice versa.
	A3_ONLY_CASES="partition5d,partition5d_dynamic,mrgsort,tmatmulk_autosync"
	QWEN3_TILELET_A5_ONLY_CASES="$(printf 'qwen3_decode_layer_incore_%s,' {0..19})"
	QWEN3_TILELET_A5_ONLY_CASES="${QWEN3_TILELET_A5_ONLY_CASES%,}"
	A5_ONLY_CASES="partition5d_a5,partition5d_dynamic_a5,mrgsort_a5,tmatmulk_autosync_a5,tpack,${QWEN3_TILELET_A5_ONLY_CASES}"

	sv_lc="$(printf '%s' "${SOC_VERSION}" \| tr '[:upper:]' '[:lower:]')"
	is_a5=0
	if [[ "${sv_lc}" == "950" \|\| "${sv_lc}" == "a5" ]]; then
	is_a5=1
	fi

	if [[ ${is_a5} -eq 1 ]]; then
	SKIP_CASES="${SKIP_CASES:+${SKIP_CASES},}${A3_ONLY_CASES}"
	else
	SKIP_CASES="${SKIP_CASES:+${SKIP_CASES},}${A5_ONLY_CASES}"
	fi

	echo "STAGE=${STAGE}" >> "${GITHUB_ENV}"
	echo "RUN_MODE=${RUN_MODE}" >> "${GITHUB_ENV}"
	echo "SOC_VERSION=${SOC_VERSION}" >> "${GITHUB_ENV}"
	echo "DEVICE_ID=${DEVICE_ID}" >> "${GITHUB_ENV}"
	echo "SKIP_CASES=${SKIP_CASES}" >> "${GITHUB_ENV}"
	echo "RUN_ONLY_CASES=${RUN_ONLY_CASES}" >> "${GITHUB_ENV}"
	echo "PTO_ISA_REPO=${PTO_ISA_REPO}" >> "${GITHUB_ENV}"
	echo "PTO_ISA_COMMIT=${PTO_ISA_COMMIT}" >> "${GITHUB_ENV}"
	echo "REMOTE_HOST=${REMOTE_HOST}" >> "${GITHUB_ENV}"
	echo "REMOTE_USER=${REMOTE_USER}" >> "${GITHUB_ENV}"
	echo "REMOTE_PORT=${REMOTE_PORT}" >> "${GITHUB_ENV}"

	- name: Setup SSH
	shell: bash
	env:
	SSH_KEY: ${{ secrets.SSH_KEY }}
	SSH_KNOWN_HOSTS: ${{ secrets.SSH_KNOWN_HOSTS }}
	run: \|
	set -euo pipefail
	if [[ -z "${SSH_KEY}" ]]; then
	echo "ERROR: secrets.SSH_KEY is not set"
	exit 1
	fi
	if [[ -z "${SSH_KNOWN_HOSTS}" ]]; then
	echo "ERROR: secrets.SSH_KNOWN_HOSTS is not set"
	exit 1
	fi

	mkdir -p ~/.ssh
	chmod 700 ~/.ssh
	printf '%s\n' "${SSH_KEY}" > ~/.ssh/id_ed25519
	chmod 600 ~/.ssh/id_ed25519
	printf '%s\n' "${SSH_KNOWN_HOSTS}" > ~/.ssh/known_hosts
	chmod 644 ~/.ssh/known_hosts

	- name: Download payload artifact
	uses: actions/download-artifact@v4
	with:
	name: ptoas_payload
	path: ${{ env.PAYLOAD_DOWNLOAD_DIR }}

	- name: Vendor pto-isa into payload (offline remote)
	shell: bash
	run: \|
	set -euo pipefail
	tmp_root="${RUNNER_TEMP:-${GITHUB_WORKSPACE}/.tmp}"
	work="$(mktemp -d "${tmp_root}/ptoas_payload_unpack.XXXXXX")"
	tar -xzf "${PAYLOAD_TGZ}" -C "${work}"

	rm -rf "${work}/pto-isa"
	git clone "${PTO_ISA_REPO}" "${work}/pto-isa"
	if [[ -n "${PTO_ISA_COMMIT}" ]]; then
	git -C "${work}/pto-isa" checkout -f "${PTO_ISA_COMMIT}"
	else
	git -C "${work}/pto-isa" checkout -f origin/HEAD \|\| true
	fi
	# Ship a working tree only; remote should not need outbound network.
	rm -rf "${work}/pto-isa/.git"

	tar -czf "${PAYLOAD_TGZ}" -C "${work}" .

	- name: Copy payload to remote
	shell: bash
	run: \|
	set -euo pipefail
	if [[ ! -f "${PAYLOAD_TGZ}" ]]; then
	echo "ERROR: payload tarball not found: ${PAYLOAD_TGZ}"
	exit 1
	fi
	REMOTE_DIR="/tmp/ptoas_npu_validation/${GITHUB_REPOSITORY}/${GITHUB_RUN_ID}"
	echo "REMOTE_DIR=${REMOTE_DIR}" >> "${GITHUB_ENV}"
	ssh -p "${REMOTE_PORT}" "${REMOTE_USER}@${REMOTE_HOST}" "rm -rf '${REMOTE_DIR}' && mkdir -p '${REMOTE_DIR}'"
	scp -P "${REMOTE_PORT}" "${PAYLOAD_TGZ}" "${REMOTE_USER}@${REMOTE_HOST}:${REMOTE_DIR}/payload.tgz"

	- name: Run remote validation
	shell: bash
	run: \|
	set -euo pipefail
	ssh -p "${REMOTE_PORT}" "${REMOTE_USER}@${REMOTE_HOST}" \
	"set -euo pipefail; \
	cd '${REMOTE_DIR}'; \
	tar -xzf payload.tgz; \
	STAGE='${STAGE}' RUN_MODE='${RUN_MODE}' SOC_VERSION='${SOC_VERSION}' PTO_ISA_REPO='${PTO_ISA_REPO}' PTO_ISA_COMMIT='${PTO_ISA_COMMIT}' DEVICE_ID='${DEVICE_ID}' SKIP_CASES='${SKIP_CASES}' RUN_ONLY_CASES='${RUN_ONLY_CASES}' \
	bash ./test/npu_validation/scripts/run_remote_npu_validation.sh"

	- name: Fetch results
	if: always()
	shell: bash
	run: \|
	set -euo pipefail
	if [[ -z "${REMOTE_DIR:-}" ]]; then
	echo "REMOTE_DIR is not set; skipping results fetch."
	exit 0
	fi
	scp -P "${REMOTE_PORT}" "${REMOTE_USER}@${REMOTE_HOST}:${REMOTE_DIR}/remote_npu_validation_results.tsv" ./remote_npu_validation_results.tsv \|\| true

	- name: Upload results artifact
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: remote_npu_validation_results
	path: remote_npu_validation_results.tsv
	if-no-files-found: warn

	- name: Cleanup remote
	if: always()
	shell: bash
	run: \|
	set -euo pipefail
	if [[ -n "${REMOTE_DIR:-}" ]]; then
	ssh -p "${REMOTE_PORT}" "${REMOTE_USER}@${REMOTE_HOST}" "rm -rf '${REMOTE_DIR}'" \|\| true
	fi

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

test: add qwen3 decode A3/A5 PTO cases #1450

Workflow file

test: add qwen3 decode A3/A5 PTO cases #1450

Uh oh!

Workflow file for this run