From aa78a1c8127bdbfc305804a52c39ef37de8706da Mon Sep 17 00:00:00 2001 From: ethanwee1 Date: Wed, 29 Apr 2026 14:58:41 +0000 Subject: [PATCH 1/6] [CI] Add source rebuild environment to Docker image Keep the existing torch/ROCm wheel installation flow, but prepare the Docker image for source rebuilds by cloning PyTorch with full history and submodules, installing ccache, and exporting the compiler/cache env vars needed by PyTorch builds. Derive PYTORCH_ROCM_ARCH from the selected AMDGPU family and pass it as a Docker build arg (e.g. gfx94X-dcgpu -> gfx942, gfx950-dcgpu -> gfx950). --- .../build_portable_linux_pytorch_dockers.yml | 24 +++++++++++++++++-- dockerfiles/Dockerfile | 19 ++++++++++++--- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_portable_linux_pytorch_dockers.yml b/.github/workflows/build_portable_linux_pytorch_dockers.yml index d5c9a94c3b1ad..20a32b963edd7 100644 --- a/.github/workflows/build_portable_linux_pytorch_dockers.yml +++ b/.github/workflows/build_portable_linux_pytorch_dockers.yml @@ -93,7 +93,8 @@ jobs: repository: ${{ matrix.pytorch_repo }} ref: ${{ matrix.pytorch_branch }} path: pytorch-src - fetch-depth: 1 + fetch-depth: 0 + submodules: recursive - name: Derive torch version prefix from branch id: prefix @@ -164,6 +165,14 @@ jobs: echo "pytorch_repo=${{ matrix.pytorch_repo }}" >> $GITHUB_OUTPUT echo "pytorch_branch=${{ matrix.pytorch_branch }}" >> $GITHUB_OUTPUT + GFX="${{ env.DEFAULT_AMDGPU_FAMILY }}" + case "${GFX}" in + gfx94X-dcgpu) PYTORCH_ROCM_ARCH="gfx942" ;; + *-*) PYTORCH_ROCM_ARCH="${GFX%%-*}" ;; + *) PYTORCH_ROCM_ARCH="${GFX}" ;; + esac + echo "pytorch_rocm_arch=${PYTORCH_ROCM_ARCH}" >> $GITHUB_OUTPUT + COMMIT="$(cd pytorch-src && git rev-parse --short=8 HEAD)" echo "pytorch_commit=${COMMIT}" >> $GITHUB_OUTPUT @@ -213,6 +222,7 @@ jobs: --build-arg "PYTHON_VERSION=${{ steps.cfg.outputs.python_version }}" \ --build-arg "INDEX_URL=${{ steps.cfg.outputs.index_url }}" \ --build-arg "TORCH_VERSION_PREFIX=${{ steps.prefix.outputs.value }}" \ + --build-arg "PYTORCH_ROCM_ARCH=${{ steps.cfg.outputs.pytorch_rocm_arch }}" \ pytorch-src echo "Docker image built successfully: ${IMAGE}" @@ -264,7 +274,8 @@ jobs: repository: ${{ inputs.pytorch_repo || 'pytorch/pytorch' }} ref: ${{ inputs.pytorch_branch || 'nightly' }} path: pytorch-src - fetch-depth: 1 + fetch-depth: 0 + submodules: recursive - name: Derive torch version prefix from branch id: prefix @@ -340,6 +351,14 @@ jobs: echo "pytorch_repo=${{ inputs.pytorch_repo || 'pytorch/pytorch' }}" >> $GITHUB_OUTPUT echo "pytorch_branch=${{ inputs.pytorch_branch || 'nightly' }}" >> $GITHUB_OUTPUT + GFX="${{ inputs.amdgpu_family || env.DEFAULT_AMDGPU_FAMILY }}" + case "${GFX}" in + gfx94X-dcgpu) PYTORCH_ROCM_ARCH="gfx942" ;; + *-*) PYTORCH_ROCM_ARCH="${GFX%%-*}" ;; + *) PYTORCH_ROCM_ARCH="${GFX}" ;; + esac + echo "pytorch_rocm_arch=${PYTORCH_ROCM_ARCH}" >> $GITHUB_OUTPUT + COMMIT="$(cd pytorch-src && git rev-parse --short=8 HEAD)" echo "pytorch_commit=${COMMIT}" >> $GITHUB_OUTPUT @@ -389,6 +408,7 @@ jobs: --build-arg "PYTHON_VERSION=${{ steps.cfg.outputs.python_version }}" \ --build-arg "INDEX_URL=${{ steps.cfg.outputs.index_url }}" \ --build-arg "TORCH_VERSION_PREFIX=${{ steps.cfg.outputs.torch_prefix }}" \ + --build-arg "PYTORCH_ROCM_ARCH=${{ steps.cfg.outputs.pytorch_rocm_arch }}" \ pytorch-src echo "Docker image built successfully: ${IMAGE}" diff --git a/dockerfiles/Dockerfile b/dockerfiles/Dockerfile index 361d0219eceef..97a51e10afc2c 100644 --- a/dockerfiles/Dockerfile +++ b/dockerfiles/Dockerfile @@ -26,6 +26,7 @@ # - AMDGPU_FAMILY : AMD GPU family (e.g., gfx94X-dcgpu, gfx90X-dcgpu, gfx950-dcgpu) # - PYTHON_VERSION : Python version for PyTorch (default: 3.12) # - INDEX_URL : (Required) Base URL for PyTorch wheels index +# - PYTORCH_ROCM_ARCH: GPU arch target for source builds / local rebuilds # - TORCH_VERSION : Optional specific PyTorch version. If not set, installs latest. # - TORCHAUDIO_VERSION : Optional specific torchaudio version. If not set, installs latest. # - TORCHVISION_VERSION: Optional specific torchvision version. If not set, installs latest. @@ -58,6 +59,7 @@ ARG RELEASE_TYPE=nightly # PyTorch configuration arguments ARG PYTHON_VERSION=3.12 ARG INDEX_URL +ARG PYTORCH_ROCM_ARCH ARG TORCH_VERSION ARG TORCH_VERSION_PREFIX ARG TORCHAUDIO_VERSION @@ -75,6 +77,12 @@ COPY . /workspace/pytorch RUN chmod +x /tmp/install_rocm_deps.sh && \ /tmp/install_rocm_deps.sh +# Install ccache for PyTorch source rebuilds in the image. +RUN if ! command -v ccache >/dev/null 2>&1; then \ + apt-get update; \ + apt-get install -y ccache; \ + fi + # Install the requested Python version if not already available. # Ubuntu 24.04 ships with 3.12; other versions come from deadsnakes PPA. RUN if ! command -v python${PYTHON_VERSION} >/dev/null 2>&1; then \ @@ -120,7 +128,6 @@ ENV ROCM_HOME="/opt/venv/lib/python${PYTHON_VERSION}/site-packages/_rocm_sdk_dev ROCM_SOURCE_DIR="/opt/venv/lib/python${PYTHON_VERSION}/site-packages/_rocm_sdk_devel" \ ROCM_BIN="/opt/venv/lib/python${PYTHON_VERSION}/site-packages/_rocm_sdk_devel/bin" \ ROCM_CMAKE="/opt/venv/lib/python${PYTHON_VERSION}/site-packages/_rocm_sdk_devel/lib/cmake" \ - PYTORCH_ROCM_ARCH="${AMDGPU_FAMILY}" \ VIRTUAL_ENV=/opt/venv \ USE_MSLK=0 @@ -133,8 +140,13 @@ ENV CMAKE_PREFIX_PATH="${ROCM_CMAKE}" \ PKG_CONFIG_PATH="${ROCM_HOME}/lib/rocm_sysdeps/lib/pkgconfig" \ LD_LIBRARY_PATH="${ROCM_HOME}/lib/host-math/lib:${ROCM_HOME}/lib/rocm_sysdeps/lib" \ LIBRARY_PATH="${ROCM_HOME}/lib/host-math/lib:${ROCM_HOME}/lib/rocm_sysdeps/lib" \ - CC="${ROCM_HOME}/lib/llvm/bin/clang" \ - CXX="${ROCM_HOME}/lib/llvm/bin/clang++" \ + CC=/usr/bin/gcc \ + CXX=/usr/bin/g++ \ + CMAKE_C_COMPILER_LAUNCHER=ccache \ + CMAKE_CXX_COMPILER_LAUNCHER=ccache \ + CMAKE_CUDA_COMPILER_LAUNCHER=ccache \ + CMAKE_HIP_COMPILER_LAUNCHER=ccache \ + PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH}" \ PATH="${ROCM_BIN}:${PATH}" # Verify PyTorch imports and environment @@ -145,6 +157,7 @@ print('ROCm/HIP', torch.version.hip) print(f'ROCM_HOME={os.environ.get("ROCM_HOME", "NOT SET")}') print(f'CC={os.environ.get("CC", "NOT SET")}') print(f'CXX={os.environ.get("CXX", "NOT SET")}') +print(f'PYTORCH_ROCM_ARCH={os.environ.get("PYTORCH_ROCM_ARCH", "NOT SET")}') for mod in ['torchaudio', 'torchvision', 'triton']: try: m = __import__(mod) From 20b7b328ac54f21c077e39d8aff1f821a45a40a4 Mon Sep 17 00:00:00 2001 From: ethanwee1 Date: Wed, 29 Apr 2026 15:06:47 +0000 Subject: [PATCH 2/6] [CI] Place PyTorch source under /tmp in Docker image Move the copied PyTorch checkout from /workspace/pytorch to /tmp/pytorch inside the Docker image while keeping the existing wheel installation flow. --- dockerfiles/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dockerfiles/Dockerfile b/dockerfiles/Dockerfile index 97a51e10afc2c..f97f89de464e6 100644 --- a/dockerfiles/Dockerfile +++ b/dockerfiles/Dockerfile @@ -32,7 +32,7 @@ # - TORCHVISION_VERSION: Optional specific torchvision version. If not set, installs latest. # - TRITON_VERSION : Optional specific triton version. If not set, uses torch's dependency. # -# Note: The PyTorch source is included at /workspace/pytorch (from the repo root). +# Note: The PyTorch source is included at /tmp/pytorch (from the repo root). # # Build example (run from repo root): # @@ -71,7 +71,7 @@ COPY .github/scripts/install_rocm_deps.sh /tmp/ COPY .github/scripts/install_pytorch_wheels.py /tmp/ # Copy PyTorch source from the repo root -COPY . /workspace/pytorch +COPY . /tmp/pytorch # Install system dependencies RUN chmod +x /tmp/install_rocm_deps.sh && \ @@ -169,4 +169,4 @@ PYEOF # Clean up installation scripts RUN rm -f /tmp/install_rocm_deps.sh /tmp/install_pytorch_wheels.py -WORKDIR /workspace/pytorch +WORKDIR /tmp/pytorch From 25d8dde95844bd7b4158a365d79731e32494e9ea Mon Sep 17 00:00:00 2001 From: ethanwee1 Date: Wed, 29 Apr 2026 20:07:27 +0000 Subject: [PATCH 3/6] [CI] Derive PyTorch ROCm arch from TheRock metadata Use TheRock's expand_amdgpu_families.py helper to derive the concrete PYTORCH_ROCM_ARCH build targets from the selected AMDGPU family instead of maintaining ad hoc shell mappings in the Docker workflow. The workflow checks out ROCm/TheRock shallowly and invokes the helper for both scheduled matrix builds and manual dispatch builds. --- .../build_portable_linux_pytorch_dockers.yml | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build_portable_linux_pytorch_dockers.yml b/.github/workflows/build_portable_linux_pytorch_dockers.yml index 20a32b963edd7..d72d5e3de738e 100644 --- a/.github/workflows/build_portable_linux_pytorch_dockers.yml +++ b/.github/workflows/build_portable_linux_pytorch_dockers.yml @@ -96,6 +96,13 @@ jobs: fetch-depth: 0 submodules: recursive + - name: Checkout TheRock AMDGPU metadata + uses: actions/checkout@v4 + with: + repository: ROCm/TheRock + path: therock-src + fetch-depth: 1 + - name: Derive torch version prefix from branch id: prefix run: | @@ -166,11 +173,7 @@ jobs: echo "pytorch_branch=${{ matrix.pytorch_branch }}" >> $GITHUB_OUTPUT GFX="${{ env.DEFAULT_AMDGPU_FAMILY }}" - case "${GFX}" in - gfx94X-dcgpu) PYTORCH_ROCM_ARCH="gfx942" ;; - *-*) PYTORCH_ROCM_ARCH="${GFX%%-*}" ;; - *) PYTORCH_ROCM_ARCH="${GFX}" ;; - esac + PYTORCH_ROCM_ARCH="$(python3 therock-src/build_tools/github_actions/expand_amdgpu_families.py --amdgpu-families "${GFX}")" echo "pytorch_rocm_arch=${PYTORCH_ROCM_ARCH}" >> $GITHUB_OUTPUT COMMIT="$(cd pytorch-src && git rev-parse --short=8 HEAD)" @@ -277,6 +280,13 @@ jobs: fetch-depth: 0 submodules: recursive + - name: Checkout TheRock AMDGPU metadata + uses: actions/checkout@v4 + with: + repository: ROCm/TheRock + path: therock-src + fetch-depth: 1 + - name: Derive torch version prefix from branch id: prefix run: | @@ -352,11 +362,7 @@ jobs: echo "pytorch_branch=${{ inputs.pytorch_branch || 'nightly' }}" >> $GITHUB_OUTPUT GFX="${{ inputs.amdgpu_family || env.DEFAULT_AMDGPU_FAMILY }}" - case "${GFX}" in - gfx94X-dcgpu) PYTORCH_ROCM_ARCH="gfx942" ;; - *-*) PYTORCH_ROCM_ARCH="${GFX%%-*}" ;; - *) PYTORCH_ROCM_ARCH="${GFX}" ;; - esac + PYTORCH_ROCM_ARCH="$(python3 therock-src/build_tools/github_actions/expand_amdgpu_families.py --amdgpu-families "${GFX}")" echo "pytorch_rocm_arch=${PYTORCH_ROCM_ARCH}" >> $GITHUB_OUTPUT COMMIT="$(cd pytorch-src && git rev-parse --short=8 HEAD)" From 6af5d83e871c736e508ef06ef5a3e16710155ac5 Mon Sep 17 00:00:00 2001 From: ethanwee1 Date: Tue, 5 May 2026 18:02:58 +0000 Subject: [PATCH 4/6] [CI] Scan portable Docker images for vulnerabilities Run the Trivy critical-vulnerability scan before pushing portable PyTorch Docker images so the TheRock image workflow matches the existing security gate. --- .../build_portable_linux_pytorch_dockers.yml | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/.github/workflows/build_portable_linux_pytorch_dockers.yml b/.github/workflows/build_portable_linux_pytorch_dockers.yml index d72d5e3de738e..4f60eaaaaa299 100644 --- a/.github/workflows/build_portable_linux_pytorch_dockers.yml +++ b/.github/workflows/build_portable_linux_pytorch_dockers.yml @@ -241,6 +241,17 @@ jobs: echo "ROCm packages:" echo "${ROCM_PACKAGES}" + - name: Scan image for vulnerabilities + uses: aquasecurity/trivy-action@v0.36.0 + with: + image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }} + format: table + severity: CRITICAL + ignore-unfixed: true + exit-code: "1" + scanners: vuln + timeout: 30m + - name: Push Docker image run: | docker push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }} @@ -430,6 +441,17 @@ jobs: echo "ROCm packages:" echo "${ROCM_PACKAGES}" + - name: Scan image for vulnerabilities + uses: aquasecurity/trivy-action@v0.36.0 + with: + image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }} + format: table + severity: CRITICAL + ignore-unfixed: true + exit-code: "1" + scanners: vuln + timeout: 30m + - name: Push Docker image run: | docker push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }} From 97ed6041c75f50aa6a18b940a6287e8495d6c641 Mon Sep 17 00:00:00 2001 From: ethanwee1 Date: Tue, 5 May 2026 18:31:42 +0000 Subject: [PATCH 5/6] [CI] Use Docker PAT secrets for image push Update the portable Docker workflow to use the standard DOCKER_USERNAME and DOCKER_PAT secrets for Docker Hub authentication. --- .../workflows/build_portable_linux_pytorch_dockers.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_portable_linux_pytorch_dockers.yml b/.github/workflows/build_portable_linux_pytorch_dockers.yml index 4f60eaaaaa299..f9c2d6a93970e 100644 --- a/.github/workflows/build_portable_linux_pytorch_dockers.yml +++ b/.github/workflows/build_portable_linux_pytorch_dockers.yml @@ -199,8 +199,8 @@ jobs: - name: Log in to Docker Hub uses: docker/login-action@v3 with: - username: ${{ secrets.DOCKERUSERNAME }} - password: ${{ secrets.DOCKERTOKEN }} + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PAT }} - name: Prepare build context run: | @@ -399,8 +399,8 @@ jobs: - name: Log in to Docker Hub uses: docker/login-action@v3 with: - username: ${{ secrets.DOCKERUSERNAME }} - password: ${{ secrets.DOCKERTOKEN }} + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PAT }} - name: Prepare build context run: | From 8c2fac5c8f0d99527a80b6ea57d5569ea3ab84ba Mon Sep 17 00:00:00 2001 From: ethanwee1 Date: Wed, 6 May 2026 17:10:52 +0000 Subject: [PATCH 6/6] [CI] Surface inline parity log timeouts Detect log-only timeout failures that appear as inline pytest KeyboardInterrupt retries, and always download logs for parity scanning even when raw log upload is disabled. --- .../detect_log_failures.py | 74 ++++++++++++++++++- .github/workflows/parity.yml | 11 ++- 2 files changed, 77 insertions(+), 8 deletions(-) diff --git a/.automation_scripts/pytorch-unit-test-scripts/detect_log_failures.py b/.automation_scripts/pytorch-unit-test-scripts/detect_log_failures.py index 0156624c35973..913f1ad16203e 100755 --- a/.automation_scripts/pytorch-unit-test-scripts/detect_log_failures.py +++ b/.automation_scripts/pytorch-unit-test-scripts/detect_log_failures.py @@ -29,6 +29,15 @@ ) RE_EXIT_CODE = re.compile(r"Got exit code (?P\d+)") RE_TIMEOUT = re.compile(r"Command took >(\d+)min, returning 124") +RE_JOB_TIMEOUT = re.compile( + r"(?:" + r"The job running on runner .* has exceeded the maximum execution time|" + r"The action has timed out|" + r"operation was canceled|" + r"timed out after \d+" + r")", + re.IGNORECASE, +) RE_FAILED_CONSISTENTLY = re.compile( r"FAILED CONSISTENTLY: (?P\S+)" ) @@ -96,6 +105,9 @@ def parse_log_file(filepath): consistent_failures = [] flaky_tests = [] last_passed_individual = None + last_individual_test = None + pending_keyboard_interrupt = None + inline_failures = [] with open(filepath, "r", errors="replace") as f: for line in f: @@ -105,6 +117,11 @@ def parse_log_file(filepath): if ".py::" in line: m_ind = RE_INDIVIDUAL_TEST.search(line) if m_ind: + last_individual_test = { + "file": m_ind.group("test_path").split("::", 1)[0], + "cls": m_ind.group("cls"), + "method": m_ind.group("method"), + } active = current_test or last_failed_test if active and active in results: # Only update if the pytest path belongs to this shard's test file, @@ -116,6 +133,9 @@ def parse_log_file(filepath): if " ... [" not in line and "was successful" not in line \ and "failed!" not in line and "Got exit code" not in line \ and "returning 124" not in line and "FAILED CONSISTENTLY" not in line \ + and "timed out" not in line and "Timed out" not in line \ + and "operation was canceled" not in line \ + and "exceeded the maximum execution time" not in line \ and "Retrying" not in line \ and "Segmentation fault" not in line and "SIGIOT" not in line \ and "SIGSEGV" not in line and "SIGABRT" not in line \ @@ -124,6 +144,7 @@ def parse_log_file(filepath): and "Aborted (core dumped)" not in line \ and "OutOfMemoryError" not in line \ and "bad_alloc" not in line \ + and "KeyboardInterrupt" not in line \ and "stepcurrent" not in line \ and "PASSED" not in line \ and "new process" not in line: @@ -202,11 +223,45 @@ def parse_log_file(filepath): code = int(m.group("code")) if active and active in results: results[active]["exit_codes"].append(code) + elif pending_keyboard_interrupt and code in (2, 124): + inline_failures.append({ + "file": pending_keyboard_interrupt["file"], + "cls": pending_keyboard_interrupt["cls"], + "method": pending_keyboard_interrupt["method"], + "category": "TIMEOUT", + "status": "FAILED", + "reason": ( + f"{pending_keyboard_interrupt['cls']}::" + f"{pending_keyboard_interrupt['method']}" + ), + "exit_codes": str(code), + }) + pending_keyboard_interrupt = None m = RE_TIMEOUT.search(stripped) if m and active and active in results: if "TIMEOUT" not in results[active]["crashes"]: results[active]["crashes"].append("TIMEOUT") + elif m and last_individual_test: + inline_failures.append({ + "file": last_individual_test["file"], + "cls": last_individual_test["cls"], + "method": last_individual_test["method"], + "category": "TIMEOUT", + "status": "FAILED", + "reason": ( + f"{last_individual_test['cls']}::" + f"{last_individual_test['method']}" + ), + "exit_codes": "124", + }) + + if "KeyboardInterrupt" in stripped and last_individual_test: + pending_keyboard_interrupt = last_individual_test + + if RE_JOB_TIMEOUT.search(stripped) and active and active in results: + if "TIMEOUT" not in results[active]["crashes"]: + results[active]["crashes"].append("TIMEOUT") m = RE_FAILED_CONSISTENTLY.search(stripped) if m: @@ -251,7 +306,7 @@ def parse_log_file(filepath): if label not in results[active]["crashes"]: results[active]["crashes"].append(label) - return results, consistent_failures, flaky_tests + return results, consistent_failures, flaky_tests, inline_failures def scan_logs(logs_dir): @@ -293,7 +348,7 @@ def scan_logs(logs_dir): job_shard_str = f"{shard_num}/{job_total}" if job_total else str(shard_num) filepath = os.path.join(logs_dir, fname) - results, consistent_failures, flaky_tests = parse_log_file(filepath) + results, consistent_failures, flaky_tests, inline_failures = parse_log_file(filepath) for ft in flaky_tests: file_part = ft["file"].replace("test/", "").replace(".py", "") @@ -308,6 +363,21 @@ def scan_logs(logs_dir): "test_shard": ft["test_shard"], }) + for failure in inline_failures: + file_part = failure["file"].replace("test/", "").replace(".py", "") + all_failures.append({ + "log_file": fname, + "platform": platform, + "test_config": test_config, + "test_file": file_part, + "job_shard": job_shard_str, + "test_shard": "", + "status": failure["status"], + "category": failure["category"], + "reason": failure["reason"], + "exit_codes": failure["exit_codes"], + }) + # Record every (test_file, test_shard) observed in this log file, # including PASSED ones, so the inventory covers the full run. for info in results.values(): diff --git a/.github/workflows/parity.yml b/.github/workflows/parity.yml index b47049ca03f16..ae05210ae70be 100644 --- a/.github/workflows/parity.yml +++ b/.github/workflows/parity.yml @@ -38,7 +38,7 @@ on: default: false type: boolean include_logs: - description: 'Download and include CI log files (.txt) in artifact zip' + description: 'Include raw CI log files (.txt) in artifact zip; logs are still scanned for failures' required: false default: true type: boolean @@ -157,9 +157,6 @@ jobs: ARGS="$ARGS --exclude_default" fi ARGS="$ARGS --ignore_status" - if [ "${{ inputs.include_logs }}" != "true" ]; then - ARGS="$ARGS --artifacts_only" - fi if [ "${{ inputs.skip_rocm }}" = "true" ]; then ARGS="$ARGS --no_rocm" fi @@ -246,7 +243,6 @@ jobs: fi - name: Detect log-based failures (timeouts, crashes) - if: ${{ inputs.include_logs }} working-directory: .automation_scripts/pytorch-unit-test-scripts run: | FOLDER="${{ steps.folder.outputs.folder }}" @@ -262,9 +258,12 @@ jobs: FOLDER=".automation_scripts/pytorch-unit-test-scripts/${{ steps.folder.outputs.folder }}" PATHS="${FOLDER}/*.csv ${FOLDER}/*.log - ${FOLDER}/*.txt ${FOLDER}/inductor_periodic_rocm_dir/ ${FOLDER}/inductor_periodic_cuda_dir/" + if [ "${{ inputs.include_logs }}" = "true" ]; then + PATHS="${PATHS} + ${FOLDER}/*.txt" + fi if [ "${{ inputs.include_xml }}" = "true" ]; then PATHS="${PATHS} ${FOLDER}/rocm_xml/