Skip to content
89 changes: 76 additions & 13 deletions .github/workflows/build_kernel_windows.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,31 +26,94 @@ jobs:
runs-on: ${{ matrix.os }}

steps:
- uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
key: cuda-toolkit-v${{ matrix.cuda }}-${{ matrix.os }}
path: |
C:\Program Files\NVIDIA GPU Computing Toolkit
~/.cargo/registry
~/.cargo/git

- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

# CUDA environment setup
- uses: huggingface/cuda-toolkit@714c97b32958862237b96401fb253a4261453c3b # v0.1.0
id: setup-cuda-toolkit
# ---- CUDA toolkit (cache + skip installer on hit) ----
# On a cache hit we restore C:\Program Files\NVIDIA GPU Computing Toolkit
# and skip the cuda-toolkit action entirely (which otherwise spends ~7
# min running the MSI even when the files are already on disk). We then
# replicate the small bit of env setup the action would have done — see
# the next step.
- name: Cache CUDA toolkit
id: cuda-cache
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
cuda: ${{ matrix.torch.cuda }} # TODO(mfuntowicz): How can we test multiple CUDA versions than align with torch?
path: C:\Program Files\NVIDIA GPU Computing Toolkit
# Key bumps:
# - matrix.torch.cuda — different CUDA versions get separate caches
# - 714c97b3 — pinned SHA of huggingface/cuda-toolkit; bump when the
# action changes so we re-download instead of reusing a stale tree
key: cuda-toolkit-${{ matrix.torch.cuda }}-714c97b3-${{ matrix.os }}

- name: Install CUDA toolkit
if: steps.cuda-cache.outputs.cache-hit != 'true'
uses: huggingface/cuda-toolkit@714c97b32958862237b96401fb253a4261453c3b # v0.1.0
with:
cuda: ${{ matrix.torch.cuda }}

- name: Restore CUDA env vars (cache hit only)
# huggingface/cuda-toolkit's updatePath sets CUDA_PATH, CUDA_PATH_VX_Y,
# and prepends <CUDA_PATH>\bin to PATH. When we skip the action above,
# those env mutations don't happen — replicate them here so nvcc and
# the downstream builds find the toolkit.
#
# Also re-install the MSBuild integration: the CUDA installer normally
# copies CUDA <ver>.{props,targets,xml} from the toolkit's
# extras\visual_studio_integration\MSBuildExtensions\ into the VS
# BuildCustomizations dir. Without that, CMake's CUDA language detection
# fails with "No CUDA toolset found". Cache only restores the toolkit
# tree, so we copy the props in by hand on cache hits.
if: steps.cuda-cache.outputs.cache-hit == 'true'
shell: pwsh
run: |
$parts = "${{ matrix.torch.cuda }}".Split('.')
$major = $parts[0]
$minor = $parts[1]
$cudaPath = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$major.$minor"
"CUDA_PATH=$cudaPath" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
"CUDA_PATH_V${major}_${minor}=$cudaPath" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
"$cudaPath\bin" | Out-File $env:GITHUB_PATH -Append -Encoding utf8

$msBuildExt = Join-Path $cudaPath 'extras\visual_studio_integration\MSBuildExtensions'
if (-not (Test-Path $msBuildExt)) {
throw "MSBuild integration not found in cached toolkit at $msBuildExt — cache may be incomplete."
}
# GitHub-hosted windows-2022 ships VS 2022 Enterprise; glob anyway so
# we don't silently break if the image switches edition.
$vsRoots = Get-ChildItem 'C:\Program Files\Microsoft Visual Studio\2022' -Directory -ErrorAction SilentlyContinue
if (-not $vsRoots) { throw "Visual Studio 2022 not found on runner." }
foreach ($vs in $vsRoots) {
$dest = Join-Path $vs.FullName 'MSBuild\Microsoft\VC\v170\BuildCustomizations'
New-Item -ItemType Directory -Force -Path $dest | Out-Null
Copy-Item -Path (Join-Path $msBuildExt '*') -Destination $dest -Force -Recurse
Write-Host "Installed CUDA MSBuild integration into $dest"
}

- name: "NVCC checks"
run: nvcc -V

# Rust build environment setup
# ---- Rust toolchain + cached kernel-builder build ----
- uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
with:
toolchain: stable
profile: minimal
override: true

# Caches the workspace target/ plus ~/.cargo/{registry,git}. Keys on
# Cargo.lock so a clean dep-graph change invalidates the artifact cache
# but unrelated edits reuse it incrementally. Cuts the kernel-builder
# build from ~8 min cold to ~30s warm.
#
# workspaces must point at the actual workspace root (root Cargo.toml
# has `[workspace] members = [..., "kernel-builder", ...]`). Cargo
# always writes target/ at the workspace root, so caching
# ./kernel-builder/target would restore to a path cargo never reads.
- name: Cache cargo + kernel-builder target
uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
with:
workspaces: .
shared-key: kernel-builder-${{ matrix.os }}

- name: Build kernel-builder
run: ( cd kernel-builder && cargo build --release )

Expand Down
Loading