test: add qwen3 decode A3/A5 PTO cases #1452
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI | |
| on: | |
| push: | |
| pull_request: | |
| # Nightly remote-board validation (GitHub cron is UTC). | |
| # 22:00 CST (UTC+8) == 14:00 UTC. | |
| schedule: | |
| - cron: "0 14 * * *" | |
| workflow_dispatch: | |
| inputs: | |
| stage: | |
| description: "Validation stage (build|run)" | |
| type: choice | |
| options: [build, run] | |
| default: run | |
| run_mode: | |
| description: "Run mode passed to generated CMake (sim|npu)" | |
| type: choice | |
| options: [npu, sim] | |
| default: npu | |
| soc_version: | |
| description: "Ascend SoC version (e.g. Ascend910B1)" | |
| type: string | |
| default: Ascend910 | |
| device_id: | |
| description: "aclrtSetDevice device id" | |
| type: string | |
| # NOTE: On our shared remote NPU host, device 0/1 may be unstable or | |
| # occupied. Default to a higher id to reduce flakiness for scheduled | |
| # runs. Override in workflow_dispatch if needed. | |
| default: "2" | |
| skip_cases: | |
| description: "Comma/space separated testcase names to skip (e.g. scatter,mrgsort)" | |
| type: string | |
| default: "mix_kernel,vadd_validshape,vadd_validshape_dynamic,print,storefp" | |
| run_only_cases: | |
| description: "Comma/space separated testcase names to run (empty = run all)" | |
| type: string | |
| default: "" | |
| pto_isa_repo: | |
| description: "pto-isa repo URL on remote" | |
| type: string | |
| default: https://gitcode.com/cann/pto-isa.git | |
| pto_isa_commit: | |
| description: "pto-isa ref (commit/tag/branch; empty = repo-pinned weekly commit)" | |
| type: string | |
| # NOTE: Pin a known-good GitCode commit for deterministic runs. | |
| default: 662d7f2a916d6bbde3109ce4a16ed5c28f5d900a | |
| remote_host: | |
| description: "SSH host/IP for the NPU machine" | |
| type: string | |
| default: 101.245.68.6 | |
| remote_user: | |
| description: "SSH user for the NPU machine" | |
| type: string | |
| default: zhongxuan | |
| remote_port: | |
| description: "SSH port" | |
| type: string | |
| default: "22" | |
| permissions: | |
| contents: read | |
| actions: write | |
| pull-requests: read | |
| jobs: | |
| license-header-check: | |
| if: ${{ github.event_name == 'pull_request' || github.event_name == 'push' }} | |
| runs-on: ubuntu-22.04 | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: ${{ github.event.pull_request.head.repo.full_name || github.repository }} | |
| ref: ${{ github.event.pull_request.head.sha || github.sha }} | |
| fetch-depth: 0 | |
| persist-credentials: false | |
| - name: Check PR386 license headers | |
| env: | |
| GITHUB_TOKEN: ${{ github.token }} | |
| run: | | |
| python3 .github/scripts/check_license_headers.py \ | |
| --repo "${{ github.repository }}" \ | |
| --event-name "${{ github.event_name }}" \ | |
| --pr-number "${{ github.event.pull_request.number || '' }}" \ | |
| --base-sha "${{ github.event.pull_request.base.sha || github.event.before || '' }}" \ | |
| --head-sha "${{ github.event.pull_request.head.sha || github.sha }}" \ | |
| --github-token "${GITHUB_TOKEN}" | |
| build-and-test: | |
| runs-on: ubuntu-22.04 | |
| env: | |
| LLVM_COMMIT: cd708029e0b2869e80abe31ddb175f7c35361f90 | |
| LLVM_DIR: ${{ github.workspace }}/llvm-project/llvm/build-shared | |
| PTO_INSTALL_DIR: ${{ github.workspace }}/install | |
| MLIR_PYTHONPATH: ${{ github.workspace }}/llvm-project/llvm/build-shared/tools/mlir/python_packages/mlir_core | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: ${{ github.event.pull_request.head.repo.full_name || github.repository }} | |
| ref: ${{ github.event.pull_request.head.sha || github.sha }} | |
| fetch-depth: 1 | |
| persist-credentials: false | |
| - name: Install dependencies | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y \ | |
| cmake git ninja-build \ | |
| python3 python3-pip python3-venv \ | |
| clang lld \ | |
| libedit-dev zlib1g-dev libxml2-dev libzstd-dev | |
| python3 -m pip install --upgrade pip | |
| # LLVM/MLIR Python bindings are not yet compatible with pybind11 3.x. | |
| python3 -m pip install 'pybind11<3' numpy | |
| - name: Define payload paths | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| # NOTE: Some GitHub/GHES versions don't allow using `${{ runner.temp }}` in | |
| # `jobs.<job>.env`. Use the runtime env var instead. | |
| tmp_root="${RUNNER_TEMP:-${GITHUB_WORKSPACE}/.tmp}" | |
| echo "PAYLOAD_DIR=${tmp_root}/ptoas_payload" >> "${GITHUB_ENV}" | |
| echo "PAYLOAD_TGZ=${tmp_root}/ptoas_payload.tgz" >> "${GITHUB_ENV}" | |
| - name: Prepare payload dir | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| rm -rf "${PAYLOAD_DIR}" "${PAYLOAD_TGZ}" | |
| mkdir -p "${PAYLOAD_DIR}/test/samples" | |
| mkdir -p "${PAYLOAD_DIR}/test/npu_validation/scripts" | |
| mkdir -p "${PAYLOAD_DIR}/test/npu_validation/templates" | |
| # 先恢复 LLVM build 缓存 | |
| - name: Restore LLVM build cache | |
| id: cache-llvm | |
| uses: actions/cache/restore@v4 | |
| with: | |
| path: | | |
| llvm-project/llvm/build-shared | |
| key: llvm-${{ runner.os }}-${{ env.LLVM_COMMIT }}-shared-mlirpy | |
| - name: Prepare LLVM source (no rebuild) | |
| run: | | |
| mkdir -p llvm-project | |
| cd llvm-project | |
| # 如果 restore 只带来了 build-shared,这里补一个 git repo | |
| if [ ! -d .git ]; then | |
| git init | |
| git remote add origin https://github.com/llvm/llvm-project.git | |
| fi | |
| git fetch --depth 1 origin tag llvmorg-19.1.7 | |
| git checkout "${LLVM_COMMIT}" | |
| - name: Build LLVM/MLIR (only if cache miss) | |
| if: steps.cache-llvm.outputs.cache-hit != 'true' | |
| run: | | |
| cd llvm-project | |
| cmake -G Ninja -S llvm -B llvm/build-shared \ | |
| -DLLVM_ENABLE_PROJECTS="mlir;clang" \ | |
| -DBUILD_SHARED_LIBS=ON \ | |
| -DMLIR_ENABLE_BINDINGS_PYTHON=ON \ | |
| -DPython3_EXECUTABLE=python3 \ | |
| -DCMAKE_BUILD_TYPE=Release \ | |
| -DLLVM_TARGETS_TO_BUILD="host" | |
| ninja -C llvm/build-shared | |
| # LLVM build 完成后立即保存缓存,避免后续测试影响缓存内容 | |
| - name: Save LLVM build cache | |
| if: steps.cache-llvm.outputs.cache-hit != 'true' | |
| uses: actions/cache/save@v4 | |
| with: | |
| path: | | |
| llvm-project/llvm/build-shared | |
| key: llvm-${{ runner.os }}-${{ env.LLVM_COMMIT }}-shared-mlirpy | |
| - name: Build PTOAS | |
| run: | | |
| export PYBIND11_CMAKE_DIR="$(python3 -m pybind11 --cmakedir)" | |
| cmake -G Ninja -S . -B build \ | |
| -DLLVM_DIR="${LLVM_DIR}/lib/cmake/llvm" \ | |
| -DMLIR_DIR="${LLVM_DIR}/lib/cmake/mlir" \ | |
| -DPython3_EXECUTABLE=python3 \ | |
| -DPython3_FIND_STRATEGY=LOCATION \ | |
| -Dpybind11_DIR="${PYBIND11_CMAKE_DIR}" \ | |
| -DMLIR_ENABLE_BINDINGS_PYTHON=ON \ | |
| -DMLIR_PYTHON_PACKAGE_DIR="${LLVM_DIR}/tools/mlir/python_packages/mlir_core" \ | |
| -DCMAKE_INSTALL_PREFIX="${PTO_INSTALL_DIR}" \ | |
| -DCMAKE_BUILD_TYPE=Release | |
| ninja -C build ptoas | |
| ninja -C build ptobc | |
| ninja -C build install | |
| - name: Run sample tests (py -> pto -> cpp) | |
| shell: bash | |
| env: | |
| CI_EVENT_NAME: ${{ github.event_name }} | |
| WORKFLOW_SOC_VERSION: ${{ github.event.inputs.soc_version || 'Ascend910' }} | |
| PTOAS_BIN: ${{ github.workspace }}/build/tools/ptoas/ptoas | |
| PYTHON_BIN: /usr/bin/python3 | |
| MLIR_PYTHON_ROOT: ${{ env.MLIR_PYTHONPATH }} | |
| PTO_PYTHON_ROOT: ${{ env.PTO_INSTALL_DIR }}/ | |
| run: | | |
| set -euo pipefail | |
| export PYTHONPATH="${MLIR_PYTHON_ROOT}:${PTO_PYTHON_ROOT}:${PYTHONPATH:-}" | |
| export LD_LIBRARY_PATH="${LLVM_DIR}/lib:${PTO_INSTALL_DIR}/lib:${LD_LIBRARY_PATH:-}" | |
| export PTOAS_OUT_DIR="${PAYLOAD_DIR}/test/samples" | |
| if [[ "${CI_EVENT_NAME}" == "workflow_dispatch" || "${CI_EVENT_NAME}" == "schedule" ]]; then | |
| # Board-validation payloads must only contain the arch-matching | |
| # direct .pto samples. Some A3/A5 qwen decode cases intentionally | |
| # share the same testcase basename, so SKIP_CASES cannot | |
| # distinguish them later once the payload is built. | |
| SOC_VERSION="${WORKFLOW_SOC_VERSION}" bash test/samples/runop.sh --enablebc all | |
| else | |
| bash test/samples/runop.sh --enablebc all | |
| fi | |
| - name: Build payload artifact | |
| if: >- | |
| ${{ | |
| github.event_name == 'workflow_dispatch' || | |
| github.event_name == 'schedule' | |
| }} | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| cp test/npu_validation/scripts/generate_testcase.py "${PAYLOAD_DIR}/test/npu_validation/scripts/" | |
| cp test/npu_validation/scripts/run_remote_npu_validation.sh "${PAYLOAD_DIR}/test/npu_validation/scripts/" | |
| cp test/npu_validation/templates/* "${PAYLOAD_DIR}/test/npu_validation/templates/" | |
| chmod +x "${PAYLOAD_DIR}/test/npu_validation/scripts/run_remote_npu_validation.sh" | |
| tar -czf "${PAYLOAD_TGZ}" -C "${PAYLOAD_DIR}" . | |
| - name: Upload payload artifact | |
| if: >- | |
| ${{ | |
| github.event_name == 'workflow_dispatch' || | |
| github.event_name == 'schedule' | |
| }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ptoas_payload | |
| path: ${{ env.PAYLOAD_TGZ }} | |
| if-no-files-found: error | |
| remote-npu-validation: | |
| needs: build-and-test | |
| runs-on: ubuntu-22.04 | |
| timeout-minutes: 180 | |
| concurrency: | |
| group: remote-npu-validation | |
| cancel-in-progress: false | |
| # Ordering: `needs: build-and-test` enforces "CI -> remote". | |
| if: >- | |
| ${{ | |
| (github.event_name == 'workflow_dispatch' || | |
| github.event_name == 'schedule') | |
| }} | |
| env: | |
| PAYLOAD_DOWNLOAD_DIR: ${{ github.workspace }}/_payload | |
| PAYLOAD_TGZ: ${{ github.workspace }}/_payload/ptoas_payload.tgz | |
| # Temporary CI gate: skip cases that still error/flap on the remote NPU. | |
| # Update this list as we fix the underlying issues. | |
| DEFAULT_SKIP_CASES: >- | |
| mix_kernel,vadd_validshape,vadd_validshape_dynamic,print,storefp,Gemvmx | |
| steps: | |
| - name: Resolve validation parameters | |
| shell: bash | |
| env: | |
| STAGE: ${{ github.event.inputs.stage || 'run' }} | |
| RUN_MODE: ${{ github.event.inputs.run_mode || 'npu' }} | |
| SOC_VERSION: ${{ github.event.inputs.soc_version || 'Ascend910' }} | |
| DEVICE_ID: ${{ github.event.inputs.device_id || '2' }} | |
| SKIP_CASES: ${{ github.event.inputs.skip_cases || '' }} | |
| RUN_ONLY_CASES: ${{ github.event.inputs.run_only_cases || '' }} | |
| PTO_ISA_REPO: ${{ github.event.inputs.pto_isa_repo || 'https://gitcode.com/cann/pto-isa.git' }} | |
| PTO_ISA_COMMIT: ${{ github.event.inputs.pto_isa_commit || '662d7f2a916d6bbde3109ce4a16ed5c28f5d900a' }} | |
| REMOTE_HOST: ${{ github.event.inputs.remote_host || '101.245.68.6' }} | |
| REMOTE_USER: ${{ github.event.inputs.remote_user || 'zhongxuan' }} | |
| REMOTE_PORT: ${{ github.event.inputs.remote_port || '22' }} | |
| run: | | |
| set -euo pipefail | |
| # For scheduled runs, default to DEFAULT_SKIP_CASES (known-bad/flaky). | |
| # For workflow_dispatch runs, honor the user's input (the UI default | |
| # is pre-filled but can be edited to run everything). | |
| if [[ "${GITHUB_EVENT_NAME}" != "workflow_dispatch" ]]; then | |
| if [[ -z "${SKIP_CASES}" ]]; then | |
| SKIP_CASES="${DEFAULT_SKIP_CASES}" | |
| fi | |
| fi | |
| # Some validation samples are arch-specific due to stricter pto-isa | |
| # static checks and A5-only tile layouts. Always skip the | |
| # non-matching variant based on SOC_VERSION, even for explicit | |
| # RUN_ONLY_CASES requests, so remote validation does not try to force | |
| # A5-only cases through an A3 flow or vice versa. | |
| A3_ONLY_CASES="partition5d,partition5d_dynamic,mrgsort,tmatmulk_autosync" | |
| QWEN3_TILELET_A5_ONLY_CASES="$(printf 'qwen3_decode_layer_incore_%s,' {0..19})" | |
| QWEN3_TILELET_A5_ONLY_CASES="${QWEN3_TILELET_A5_ONLY_CASES%,}" | |
| A5_ONLY_CASES="partition5d_a5,partition5d_dynamic_a5,mrgsort_a5,tmatmulk_autosync_a5,tpack,${QWEN3_TILELET_A5_ONLY_CASES}" | |
| sv_lc="$(printf '%s' "${SOC_VERSION}" | tr '[:upper:]' '[:lower:]')" | |
| is_a5=0 | |
| if [[ "${sv_lc}" == *"950"* || "${sv_lc}" == *"a5"* ]]; then | |
| is_a5=1 | |
| fi | |
| if [[ ${is_a5} -eq 1 ]]; then | |
| SKIP_CASES="${SKIP_CASES:+${SKIP_CASES},}${A3_ONLY_CASES}" | |
| else | |
| SKIP_CASES="${SKIP_CASES:+${SKIP_CASES},}${A5_ONLY_CASES}" | |
| fi | |
| echo "STAGE=${STAGE}" >> "${GITHUB_ENV}" | |
| echo "RUN_MODE=${RUN_MODE}" >> "${GITHUB_ENV}" | |
| echo "SOC_VERSION=${SOC_VERSION}" >> "${GITHUB_ENV}" | |
| echo "DEVICE_ID=${DEVICE_ID}" >> "${GITHUB_ENV}" | |
| echo "SKIP_CASES=${SKIP_CASES}" >> "${GITHUB_ENV}" | |
| echo "RUN_ONLY_CASES=${RUN_ONLY_CASES}" >> "${GITHUB_ENV}" | |
| echo "PTO_ISA_REPO=${PTO_ISA_REPO}" >> "${GITHUB_ENV}" | |
| echo "PTO_ISA_COMMIT=${PTO_ISA_COMMIT}" >> "${GITHUB_ENV}" | |
| echo "REMOTE_HOST=${REMOTE_HOST}" >> "${GITHUB_ENV}" | |
| echo "REMOTE_USER=${REMOTE_USER}" >> "${GITHUB_ENV}" | |
| echo "REMOTE_PORT=${REMOTE_PORT}" >> "${GITHUB_ENV}" | |
| - name: Setup SSH | |
| shell: bash | |
| env: | |
| SSH_KEY: ${{ secrets.SSH_KEY }} | |
| SSH_KNOWN_HOSTS: ${{ secrets.SSH_KNOWN_HOSTS }} | |
| run: | | |
| set -euo pipefail | |
| if [[ -z "${SSH_KEY}" ]]; then | |
| echo "ERROR: secrets.SSH_KEY is not set" | |
| exit 1 | |
| fi | |
| if [[ -z "${SSH_KNOWN_HOSTS}" ]]; then | |
| echo "ERROR: secrets.SSH_KNOWN_HOSTS is not set" | |
| exit 1 | |
| fi | |
| mkdir -p ~/.ssh | |
| chmod 700 ~/.ssh | |
| printf '%s\n' "${SSH_KEY}" > ~/.ssh/id_ed25519 | |
| chmod 600 ~/.ssh/id_ed25519 | |
| printf '%s\n' "${SSH_KNOWN_HOSTS}" > ~/.ssh/known_hosts | |
| chmod 644 ~/.ssh/known_hosts | |
| - name: Download payload artifact | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: ptoas_payload | |
| path: ${{ env.PAYLOAD_DOWNLOAD_DIR }} | |
| - name: Vendor pto-isa into payload (offline remote) | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| tmp_root="${RUNNER_TEMP:-${GITHUB_WORKSPACE}/.tmp}" | |
| work="$(mktemp -d "${tmp_root}/ptoas_payload_unpack.XXXXXX")" | |
| tar -xzf "${PAYLOAD_TGZ}" -C "${work}" | |
| rm -rf "${work}/pto-isa" | |
| git clone "${PTO_ISA_REPO}" "${work}/pto-isa" | |
| if [[ -n "${PTO_ISA_COMMIT}" ]]; then | |
| git -C "${work}/pto-isa" checkout -f "${PTO_ISA_COMMIT}" | |
| else | |
| git -C "${work}/pto-isa" checkout -f origin/HEAD || true | |
| fi | |
| # Ship a working tree only; remote should not need outbound network. | |
| rm -rf "${work}/pto-isa/.git" | |
| tar -czf "${PAYLOAD_TGZ}" -C "${work}" . | |
| - name: Copy payload to remote | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| if [[ ! -f "${PAYLOAD_TGZ}" ]]; then | |
| echo "ERROR: payload tarball not found: ${PAYLOAD_TGZ}" | |
| exit 1 | |
| fi | |
| REMOTE_DIR="/tmp/ptoas_npu_validation/${GITHUB_REPOSITORY}/${GITHUB_RUN_ID}" | |
| echo "REMOTE_DIR=${REMOTE_DIR}" >> "${GITHUB_ENV}" | |
| ssh -p "${REMOTE_PORT}" "${REMOTE_USER}@${REMOTE_HOST}" "rm -rf '${REMOTE_DIR}' && mkdir -p '${REMOTE_DIR}'" | |
| scp -P "${REMOTE_PORT}" "${PAYLOAD_TGZ}" "${REMOTE_USER}@${REMOTE_HOST}:${REMOTE_DIR}/payload.tgz" | |
| - name: Run remote validation | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| ssh -p "${REMOTE_PORT}" "${REMOTE_USER}@${REMOTE_HOST}" \ | |
| "set -euo pipefail; \ | |
| cd '${REMOTE_DIR}'; \ | |
| tar -xzf payload.tgz; \ | |
| STAGE='${STAGE}' RUN_MODE='${RUN_MODE}' SOC_VERSION='${SOC_VERSION}' PTO_ISA_REPO='${PTO_ISA_REPO}' PTO_ISA_COMMIT='${PTO_ISA_COMMIT}' DEVICE_ID='${DEVICE_ID}' SKIP_CASES='${SKIP_CASES}' RUN_ONLY_CASES='${RUN_ONLY_CASES}' \ | |
| bash ./test/npu_validation/scripts/run_remote_npu_validation.sh" | |
| - name: Fetch results | |
| if: always() | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| if [[ -z "${REMOTE_DIR:-}" ]]; then | |
| echo "REMOTE_DIR is not set; skipping results fetch." | |
| exit 0 | |
| fi | |
| scp -P "${REMOTE_PORT}" "${REMOTE_USER}@${REMOTE_HOST}:${REMOTE_DIR}/remote_npu_validation_results.tsv" ./remote_npu_validation_results.tsv || true | |
| - name: Upload results artifact | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: remote_npu_validation_results | |
| path: remote_npu_validation_results.tsv | |
| if-no-files-found: warn | |
| - name: Cleanup remote | |
| if: always() | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| if [[ -n "${REMOTE_DIR:-}" ]]; then | |
| ssh -p "${REMOTE_PORT}" "${REMOTE_USER}@${REMOTE_HOST}" "rm -rf '${REMOTE_DIR}'" || true | |
| fi |