diff --git a/.github/workflows/build_kernel_windows.yaml b/.github/workflows/build_kernel_windows.yaml index 64a265db..6082f7ad 100644 --- a/.github/workflows/build_kernel_windows.yaml +++ b/.github/workflows/build_kernel_windows.yaml @@ -26,31 +26,94 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 - with: - key: cuda-toolkit-v${{ matrix.cuda }}-${{ matrix.os }} - path: | - C:\Program Files\NVIDIA GPU Computing Toolkit - ~/.cargo/registry - ~/.cargo/git - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - # CUDA environment setup - - uses: huggingface/cuda-toolkit@714c97b32958862237b96401fb253a4261453c3b # v0.1.0 - id: setup-cuda-toolkit + # ---- CUDA toolkit (cache + skip installer on hit) ---- + # On a cache hit we restore C:\Program Files\NVIDIA GPU Computing Toolkit + # and skip the cuda-toolkit action entirely (which otherwise spends ~7 + # min running the MSI even when the files are already on disk). We then + # replicate the small bit of env setup the action would have done — see + # the next step. + - name: Cache CUDA toolkit + id: cuda-cache + uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 with: - cuda: ${{ matrix.torch.cuda }} # TODO(mfuntowicz): How can we test multiple CUDA versions than align with torch? + path: C:\Program Files\NVIDIA GPU Computing Toolkit + # Key bumps: + # - matrix.torch.cuda — different CUDA versions get separate caches + # - 714c97b3 — pinned SHA of huggingface/cuda-toolkit; bump when the + # action changes so we re-download instead of reusing a stale tree + key: cuda-toolkit-${{ matrix.torch.cuda }}-714c97b3-${{ matrix.os }} + + - name: Install CUDA toolkit + if: steps.cuda-cache.outputs.cache-hit != 'true' + uses: huggingface/cuda-toolkit@714c97b32958862237b96401fb253a4261453c3b # v0.1.0 + with: + cuda: ${{ matrix.torch.cuda }} + + - name: Restore CUDA env vars (cache hit only) + # huggingface/cuda-toolkit's updatePath sets CUDA_PATH, CUDA_PATH_VX_Y, + # and prepends \bin to PATH. When we skip the action above, + # those env mutations don't happen — replicate them here so nvcc and + # the downstream builds find the toolkit. + # + # Also re-install the MSBuild integration: the CUDA installer normally + # copies CUDA .{props,targets,xml} from the toolkit's + # extras\visual_studio_integration\MSBuildExtensions\ into the VS + # BuildCustomizations dir. Without that, CMake's CUDA language detection + # fails with "No CUDA toolset found". Cache only restores the toolkit + # tree, so we copy the props in by hand on cache hits. + if: steps.cuda-cache.outputs.cache-hit == 'true' + shell: pwsh + run: | + $parts = "${{ matrix.torch.cuda }}".Split('.') + $major = $parts[0] + $minor = $parts[1] + $cudaPath = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$major.$minor" + "CUDA_PATH=$cudaPath" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 + "CUDA_PATH_V${major}_${minor}=$cudaPath" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 + "$cudaPath\bin" | Out-File $env:GITHUB_PATH -Append -Encoding utf8 + + $msBuildExt = Join-Path $cudaPath 'extras\visual_studio_integration\MSBuildExtensions' + if (-not (Test-Path $msBuildExt)) { + throw "MSBuild integration not found in cached toolkit at $msBuildExt — cache may be incomplete." + } + # GitHub-hosted windows-2022 ships VS 2022 Enterprise; glob anyway so + # we don't silently break if the image switches edition. + $vsRoots = Get-ChildItem 'C:\Program Files\Microsoft Visual Studio\2022' -Directory -ErrorAction SilentlyContinue + if (-not $vsRoots) { throw "Visual Studio 2022 not found on runner." } + foreach ($vs in $vsRoots) { + $dest = Join-Path $vs.FullName 'MSBuild\Microsoft\VC\v170\BuildCustomizations' + New-Item -ItemType Directory -Force -Path $dest | Out-Null + Copy-Item -Path (Join-Path $msBuildExt '*') -Destination $dest -Force -Recurse + Write-Host "Installed CUDA MSBuild integration into $dest" + } + - name: "NVCC checks" run: nvcc -V - # Rust build environment setup + # ---- Rust toolchain + cached kernel-builder build ---- - uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7 with: toolchain: stable profile: minimal override: true + # Caches the workspace target/ plus ~/.cargo/{registry,git}. Keys on + # Cargo.lock so a clean dep-graph change invalidates the artifact cache + # but unrelated edits reuse it incrementally. Cuts the kernel-builder + # build from ~8 min cold to ~30s warm. + # + # workspaces must point at the actual workspace root (root Cargo.toml + # has `[workspace] members = [..., "kernel-builder", ...]`). Cargo + # always writes target/ at the workspace root, so caching + # ./kernel-builder/target would restore to a path cargo never reads. + - name: Cache cargo + kernel-builder target + uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1 + with: + workspaces: . + shared-key: kernel-builder-${{ matrix.os }} + - name: Build kernel-builder run: ( cd kernel-builder && cargo build --release )