From f57d466d60bb0f9f855c9090ca9a0dac6cd7d9dc Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Thu, 9 Oct 2025 10:20:42 +0200 Subject: [PATCH 01/38] feat(builder): create build name directly in CMake for portability (dirty for now) --- build2cmake/src/templates/cuda/preamble.cmake | 21 +++++++++++++++++++ .../src/templates/metal/preamble.cmake | 13 ++++++++++++ build2cmake/src/templates/xpu/preamble.cmake | 15 +++++++++++++ 3 files changed, 49 insertions(+) diff --git a/build2cmake/src/templates/cuda/preamble.cmake b/build2cmake/src/templates/cuda/preamble.cmake index f415c6db..ca806f99 100644 --- a/build2cmake/src/templates/cuda/preamble.cmake +++ b/build2cmake/src/templates/cuda/preamble.cmake @@ -98,3 +98,24 @@ else() ${GPU_LANG} "${${GPU_LANG}_SUPPORTED_ARCHS}") endif() + +# Generate standardized build name +run_python(TORCH_VERSION "import torch; print(torch.__version__.split('+')[0])" "Failed to get Torch version") +run_python(CXX11_ABI_VALUE "import torch; print('TRUE' if torch._C._GLIBCXX_USE_CXX11_ABI else 'FALSE')" "Failed to get CXX11 ABI") +cmake_host_system_information(RESULT HOST_ARCH QUERY OS_PLATFORM) +if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + set(SYSTEM_STRING "${HOST_ARCH}-linux") +elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + set(SYSTEM_STRING "${HOST_ARCH}-darwin") +elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows") + set(SYSTEM_STRING "${HOST_ARCH}-windows") +else() + set(SYSTEM_STRING "${HOST_ARCH}-${CMAKE_SYSTEM_NAME}") +endif() + +if(GPU_LANG STREQUAL "CUDA") + generate_build_name(BUILD_VARIANT_NAME "${TORCH_VERSION}" ${CXX11_ABI_VALUE} "cuda" "${CUDA_VERSION}" "${SYSTEM_STRING}") +elseif(GPU_LANG STREQUAL "HIP") + run_python(ROCM_VERSION "import torch.version; print(torch.version.hip.split('.')[0] + '.' + torch.version.hip.split('.')[1])" "Failed to get ROCm version") + generate_build_name(BUILD_VARIANT_NAME "${TORCH_VERSION}" ${CXX11_ABI_VALUE} "rocm" "${ROCM_VERSION}" "${SYSTEM_STRING}") +endif() diff --git a/build2cmake/src/templates/metal/preamble.cmake b/build2cmake/src/templates/metal/preamble.cmake index c5cf4256..543a9970 100644 --- a/build2cmake/src/templates/metal/preamble.cmake +++ b/build2cmake/src/templates/metal/preamble.cmake @@ -29,3 +29,16 @@ add_compile_definitions(METAL_KERNEL) # Initialize list for Metal shader sources set(ALL_METAL_SOURCES) + +# Generate standardized build name +run_python(TORCH_VERSION "import torch; print(torch.__version__.split('+')[0])" "Failed to get Torch version") +run_python(CXX11_ABI_VALUE "import torch; print('TRUE' if torch._C._GLIBCXX_USE_CXX11_ABI else 'FALSE')" "Failed to get CXX11 ABI") +cmake_host_system_information(RESULT HOST_ARCH QUERY OS_PLATFORM) +if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + set(SYSTEM_STRING "${HOST_ARCH}-darwin") +else() + message(FATAL_ERROR "Metal is only supported on macOS/Darwin") +endif() + +# Metal doesn't have a version - it's tied to the OS +generate_build_name(BUILD_VARIANT_NAME "${TORCH_VERSION}" ${CXX11_ABI_VALUE} "metal" "0" "${SYSTEM_STRING}") diff --git a/build2cmake/src/templates/xpu/preamble.cmake b/build2cmake/src/templates/xpu/preamble.cmake index d171d100..314b39fa 100644 --- a/build2cmake/src/templates/xpu/preamble.cmake +++ b/build2cmake/src/templates/xpu/preamble.cmake @@ -58,3 +58,18 @@ add_compile_definitions(USE_XPU) set(sycl_link_flags "-fsycl;--offload-compress;-fsycl-targets=spir64_gen,spir64;-Xs;-device pvc,xe-lpg,ats-m150 -options ' -cl-intel-enable-auto-large-GRF-mode -cl-poison-unsupported-fp64-kernels -cl-intel-greater-than-4GB-buffer-required';") set(sycl_flags "-fsycl;-fhonor-nans;-fhonor-infinities;-fno-associative-math;-fno-approx-func;-fno-sycl-instrument-device-code;--offload-compress;-fsycl-targets=spir64_gen,spir64;") message(STATUS "Configuring for Intel XPU backend using SYCL") + +# Generate standardized build name +run_python(TORCH_VERSION "import torch; print(torch.__version__.split('+')[0])" "Failed to get Torch version") +run_python(CXX11_ABI_VALUE "import torch; print('TRUE' if torch._C._GLIBCXX_USE_CXX11_ABI else 'FALSE')" "Failed to get CXX11 ABI") +run_python(XPU_VERSION "import intel_extension_for_pytorch; print(intel_extension_for_pytorch.__version__.split('+')[0])" "Failed to get XPU version") +cmake_host_system_information(RESULT HOST_ARCH QUERY OS_PLATFORM) +if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + set(SYSTEM_STRING "${HOST_ARCH}-linux") +elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows") + set(SYSTEM_STRING "${HOST_ARCH}-windows") +else() + set(SYSTEM_STRING "${HOST_ARCH}-${CMAKE_SYSTEM_NAME}") +endif() + +generate_build_name(BUILD_VARIANT_NAME "${TORCH_VERSION}" ${CXX11_ABI_VALUE} "xpu" "${XPU_VERSION}" "${SYSTEM_STRING}") From 89e1bb9b7ee862ad87d0af62cb8ddc2485e8d56f Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Thu, 9 Oct 2025 10:29:34 +0200 Subject: [PATCH 02/38] feat(builder): automatically generate installation structure for local / kernels builds --- build2cmake/src/templates/cuda/torch-extension.cmake | 9 ++++++++- build2cmake/src/templates/metal/torch-extension.cmake | 8 +++++++- build2cmake/src/templates/xpu/torch-extension.cmake | 6 ++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/build2cmake/src/templates/cuda/torch-extension.cmake b/build2cmake/src/templates/cuda/torch-extension.cmake index c52eb679..e2f0ecb1 100644 --- a/build2cmake/src/templates/cuda/torch-extension.cmake +++ b/build2cmake/src/templates/cuda/torch-extension.cmake @@ -9,5 +9,12 @@ define_gpu_extension_target( USE_SABI 3 WITH_SOABI) -target_link_options({{ ops_name }} PRIVATE -static-libstdc++) +if( NOT MSVC) + target_link_options({{ ops_name }} PRIVATE -static-libstdc++) +endif() +# Add kernels_install target for huggingface/kernels library layout +add_kernels_install_target({{ ops_name }} "{{ name }}" "${BUILD_VARIANT_NAME}") + +# Add local_install target for local development with get_local_kernel() +add_local_install_target({{ ops_name }} "{{ name }}" "${BUILD_VARIANT_NAME}") diff --git a/build2cmake/src/templates/metal/torch-extension.cmake b/build2cmake/src/templates/metal/torch-extension.cmake index 3f81df03..e67e80fc 100644 --- a/build2cmake/src/templates/metal/torch-extension.cmake +++ b/build2cmake/src/templates/metal/torch-extension.cmake @@ -14,4 +14,10 @@ define_gpu_extension_target( # Compile Metal shaders if any were found if(ALL_METAL_SOURCES) compile_metal_shaders({{ ops_name }} "${ALL_METAL_SOURCES}") -endif() \ No newline at end of file +endif() + +# Add kernels_install target for huggingface/kernels library layout +add_kernels_install_target({{ ops_name }} "{{ name }}" "${BUILD_VARIANT_NAME}") + +# Add local_install target for local development with get_local_kernel() +add_local_install_target({{ ops_name }} "{{ name }}" "${BUILD_VARIANT_NAME}") \ No newline at end of file diff --git a/build2cmake/src/templates/xpu/torch-extension.cmake b/build2cmake/src/templates/xpu/torch-extension.cmake index d2a95e47..2dd1e1b1 100644 --- a/build2cmake/src/templates/xpu/torch-extension.cmake +++ b/build2cmake/src/templates/xpu/torch-extension.cmake @@ -11,3 +11,9 @@ define_gpu_extension_target( # Add XPU/SYCL specific linker flags target_link_options({{ ops_name }} PRIVATE ${sycl_link_flags}) target_link_libraries({{ ops_name }} PRIVATE dnnl) + +# Add kernels_install target for huggingface/kernels library layout +add_kernels_install_target({{ ops_name }} "{{ name }}" "${BUILD_VARIANT_NAME}") + +# Add local_install target for local development with get_local_kernel() +add_local_install_target({{ ops_name }} "{{ name }}" "${BUILD_VARIANT_NAME}") From 9995a288c0053f203af76b0e18f0ed0ce4ac9324 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Thu, 9 Oct 2025 10:30:02 +0200 Subject: [PATCH 03/38] feat(builder): expose name of the operation when populating template --- build2cmake/src/torch/cuda.rs | 5 +++-- build2cmake/src/torch/xpu.rs | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/build2cmake/src/torch/cuda.rs b/build2cmake/src/torch/cuda.rs index c58db682..206e75ab 100644 --- a/build2cmake/src/torch/cuda.rs +++ b/build2cmake/src/torch/cuda.rs @@ -184,7 +184,7 @@ fn write_cmake( render_kernel(env, kernel_name, kernel, cmake_writer)?; } - render_extension(env, ops_name, cmake_writer)?; + render_extension(env, name, ops_name, cmake_writer)?; Ok(()) } @@ -351,11 +351,12 @@ pub fn render_kernel( Ok(()) } -pub fn render_extension(env: &Environment, ops_name: &str, write: &mut impl Write) -> Result<()> { +pub fn render_extension(env: &Environment, name: &str, ops_name: &str, write: &mut impl Write) -> Result<()> { env.get_template("cuda/torch-extension.cmake") .wrap_err("Cannot get Torch extension template")? .render_to_write( context! { + name => name, ops_name => ops_name, }, &mut *write, diff --git a/build2cmake/src/torch/xpu.rs b/build2cmake/src/torch/xpu.rs index 1eaafb0f..1da3ef2e 100644 --- a/build2cmake/src/torch/xpu.rs +++ b/build2cmake/src/torch/xpu.rs @@ -159,7 +159,7 @@ fn write_cmake( render_kernel(env, kernel_name, kernel, cmake_writer)?; } - render_extension(env, ops_name, cmake_writer)?; + render_extension(env, name, ops_name, cmake_writer)?; Ok(()) } @@ -250,11 +250,12 @@ pub fn render_kernel( Ok(()) } -pub fn render_extension(env: &Environment, ops_name: &str, write: &mut impl Write) -> Result<()> { +pub fn render_extension(env: &Environment, name: &str, ops_name: &str, write: &mut impl Write) -> Result<()> { env.get_template("xpu/torch-extension.cmake") .wrap_err("Cannot get Torch extension template")? .render_to_write( context! { + name => name, ops_name => ops_name, }, &mut *write, From a396f92d2f28fabb36543f30f507bd8d62dc15a6 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Thu, 9 Oct 2025 10:31:02 +0200 Subject: [PATCH 04/38] feat(builder): add missing utils.cmake functions - oops --- build2cmake/src/templates/utils.cmake | 193 ++++++++++++++++++++++++++ 1 file changed, 193 insertions(+) diff --git a/build2cmake/src/templates/utils.cmake b/build2cmake/src/templates/utils.cmake index d4d4cef2..920b5969 100644 --- a/build2cmake/src/templates/utils.cmake +++ b/build2cmake/src/templates/utils.cmake @@ -555,3 +555,196 @@ function (define_gpu_extension_target GPU_MOD_NAME) install(TARGETS ${GPU_MOD_NAME} LIBRARY DESTINATION ${GPU_DESTINATION} COMPONENT ${GPU_MOD_NAME}) endfunction() + +# +# Generate a standardized build variant name following the pattern: +# torch--- (or torch-- for aarch64-darwin) +# +# Arguments: +# OUT_BUILD_NAME - Output variable name +# TORCH_VERSION - PyTorch version (e.g., "2.7.1") +# CXX11_ABI - Whether C++11 ABI is enabled (TRUE/FALSE) +# COMPUTE_FRAMEWORK - One of: cuda, rocm, metal, xpu +# COMPUTE_VERSION - Version of compute framework (e.g., "12.4" for CUDA, "6.0" for ROCm) +# SYSTEM - System identifier (e.g., "x86_64-linux", "aarch64-darwin") +# +# Example output: torch271-cxx11-cu124-x86_64-linux +# +function(generate_build_name OUT_BUILD_NAME TORCH_VERSION CXX11_ABI COMPUTE_FRAMEWORK COMPUTE_VERSION SYSTEM) + # Flatten version by removing dots and padding to 2 components + string(REPLACE "." ";" VERSION_LIST "${TORCH_VERSION}") + list(LENGTH VERSION_LIST VERSION_COMPONENTS) + + # Pad to at least 2 components + if(VERSION_COMPONENTS LESS 2) + list(APPEND VERSION_LIST "0") + endif() + + # Take first 2 components and join without dots + list(GET VERSION_LIST 0 MAJOR) + list(GET VERSION_LIST 1 MINOR) + set(FLATTENED_TORCH "${MAJOR}${MINOR}") + + # Determine ABI string (skip for aarch64-darwin) + if(SYSTEM STREQUAL "aarch64-darwin") + set(ABI_STRING "") + else() + if(CXX11_ABI) + set(ABI_STRING "cxx11") + else() + set(ABI_STRING "cxx98") + endif() + endif() + + # Generate compute string + if(COMPUTE_FRAMEWORK STREQUAL "cuda") + # Flatten CUDA version (e.g., "12.4" -> "124") + string(REPLACE "." ";" COMPUTE_VERSION_LIST "${COMPUTE_VERSION}") + list(LENGTH COMPUTE_VERSION_LIST COMPUTE_COMPONENTS) + if(COMPUTE_COMPONENTS GREATER_EQUAL 2) + list(GET COMPUTE_VERSION_LIST 0 COMPUTE_MAJOR) + list(GET COMPUTE_VERSION_LIST 1 COMPUTE_MINOR) + set(COMPUTE_STRING "cu${COMPUTE_MAJOR}${COMPUTE_MINOR}") + else() + list(GET COMPUTE_VERSION_LIST 0 COMPUTE_MAJOR) + set(COMPUTE_STRING "cu${COMPUTE_MAJOR}0") + endif() + elseif(COMPUTE_FRAMEWORK STREQUAL "rocm") + # Flatten ROCm version (e.g., "6.0" -> "60") + string(REPLACE "." ";" COMPUTE_VERSION_LIST "${COMPUTE_VERSION}") + list(LENGTH COMPUTE_VERSION_LIST COMPUTE_COMPONENTS) + if(COMPUTE_COMPONENTS GREATER_EQUAL 2) + list(GET COMPUTE_VERSION_LIST 0 COMPUTE_MAJOR) + list(GET COMPUTE_VERSION_LIST 1 COMPUTE_MINOR) + set(COMPUTE_STRING "rocm${COMPUTE_MAJOR}${COMPUTE_MINOR}") + else() + list(GET COMPUTE_VERSION_LIST 0 COMPUTE_MAJOR) + set(COMPUTE_STRING "rocm${COMPUTE_MAJOR}0") + endif() + elseif(COMPUTE_FRAMEWORK STREQUAL "metal") + set(COMPUTE_STRING "metal") + elseif(COMPUTE_FRAMEWORK STREQUAL "xpu") + # Flatten XPU version (e.g., "2025.2" -> "202552") + string(REPLACE "." ";" COMPUTE_VERSION_LIST "${COMPUTE_VERSION}") + list(LENGTH COMPUTE_VERSION_LIST COMPUTE_COMPONENTS) + if(COMPUTE_COMPONENTS GREATER_EQUAL 2) + list(GET COMPUTE_VERSION_LIST 0 COMPUTE_MAJOR) + list(GET COMPUTE_VERSION_LIST 1 COMPUTE_MINOR) + set(COMPUTE_STRING "xpu${COMPUTE_MAJOR}${COMPUTE_MINOR}") + else() + list(GET COMPUTE_VERSION_LIST 0 COMPUTE_MAJOR) + set(COMPUTE_STRING "xpu${COMPUTE_MAJOR}0") + endif() + else() + message(FATAL_ERROR "Unknown compute framework: ${COMPUTE_FRAMEWORK}") + endif() + + # Assemble the final build name + if(ABI_STRING STREQUAL "") + set(BUILD_NAME "torch${FLATTENED_TORCH}-${COMPUTE_STRING}-${SYSTEM}") + else() + set(BUILD_NAME "torch${FLATTENED_TORCH}-${ABI_STRING}-${COMPUTE_STRING}-${SYSTEM}") + endif() + + set(${OUT_BUILD_NAME} "${BUILD_NAME}" PARENT_SCOPE) + message(STATUS "Generated build name: ${BUILD_NAME}") +endfunction() + +# +# Create a custom install target for the huggingface/kernels library layout. +# This installs the extension into a directory structure suitable for kernel hub discovery: +# /// +# +# Arguments: +# TARGET_NAME - Name of the target to create the install rule for +# PACKAGE_NAME - Python package name (e.g., "activation") +# BUILD_VARIANT_NAME - Build variant name (e.g., "torch271-cxx11-cu124-x86_64-linux") +# INSTALL_PREFIX - Base installation directory (defaults to CMAKE_INSTALL_PREFIX) +# +function(add_kernels_install_target TARGET_NAME PACKAGE_NAME BUILD_VARIANT_NAME) + set(oneValueArgs INSTALL_PREFIX) + cmake_parse_arguments(ARG "" "${oneValueArgs}" "" ${ARGN}) + + if(NOT ARG_INSTALL_PREFIX) + set(ARG_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") + endif() + + # Create the kernels_install target if it doesn't exist + if(NOT TARGET kernels_install) + add_custom_target(kernels_install ALL + COMMENT "Installing all kernels to hub-compatible layout" + VERBATIM) + endif() + + # Create a custom target for this specific kernel + set(KERNEL_INSTALL_TARGET "${TARGET_NAME}_kernel_install") + set(KERNEL_INSTALL_DIR "${ARG_INSTALL_PREFIX}/${BUILD_VARIANT_NAME}/${PACKAGE_NAME}") + + add_custom_target(${KERNEL_INSTALL_TARGET} ALL + COMMAND ${CMAKE_COMMAND} -E make_directory "${KERNEL_INSTALL_DIR}" + COMMAND ${CMAKE_COMMAND} -E copy $ "${KERNEL_INSTALL_DIR}/" + COMMAND ${CMAKE_COMMAND} -E copy_directory + "${CMAKE_SOURCE_DIR}/torch-ext/${PACKAGE_NAME}" + "${KERNEL_INSTALL_DIR}/" + DEPENDS ${TARGET_NAME} + COMMENT "Installing ${TARGET_NAME} to ${KERNEL_INSTALL_DIR}" + VERBATIM) + + # Make kernels_install depend on this specific kernel's install + add_dependencies(kernels_install ${KERNEL_INSTALL_TARGET}) + + # Set folder for IDE organization + if(MSVC OR XCODE) + set_target_properties(${KERNEL_INSTALL_TARGET} PROPERTIES FOLDER "Install") + endif() + + message(STATUS "Added kernels_install target for ${TARGET_NAME} -> ${BUILD_VARIANT_NAME}/${PACKAGE_NAME}") +endfunction() + +# +# Add install rules for local development with huggingface/kernels. +# This installs the extension into the layout expected by get_local_kernel(): +# ${CMAKE_SOURCE_DIR}/build/// +# +# This allows developers to use get_local_kernel() from the kernels library to load +# locally built kernels without needing to publish to the hub. +# +# This uses the standard CMake install() command, so it works with the default +# "install" target that is always available. +# +# Arguments: +# TARGET_NAME - Name of the target to create the install rule for +# PACKAGE_NAME - Python package name (e.g., "activation") +# BUILD_VARIANT_NAME - Build variant name (e.g., "torch271-cxx11-cu124-x86_64-linux") +# +function(add_local_install_target TARGET_NAME PACKAGE_NAME BUILD_VARIANT_NAME) + # Define your local, folder based, installation directory + set(LOCAL_INSTALL_DIR "${CMAKE_SOURCE_DIR}/build/${BUILD_VARIANT_NAME}/${PACKAGE_NAME}") + + # Glob Python files at configure time + file(GLOB PYTHON_FILES "${CMAKE_SOURCE_DIR}/torch-ext/${PACKAGE_NAME}/*.py") + + # Create a custom target for local installation + add_custom_target(local_install + COMMENT "Installing files to local directory..." + ) + + # Add custom commands to copy files + add_custom_command(TARGET local_install POST_BUILD + # Copy the shared library + COMMAND ${CMAKE_COMMAND} -E copy_if_different + $ + ${LOCAL_INSTALL_DIR}/ + + # Copy each Python file + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${PYTHON_FILES} + ${LOCAL_INSTALL_DIR}/ + + COMMENT "Copying shared library and Python files to ${LOCAL_INSTALL_DIR}" + COMMAND_EXPAND_LISTS + ) + + file(MAKE_DIRECTORY ${LOCAL_INSTALL_DIR}) + message(STATUS "Added install rules for ${TARGET_NAME} -> build/${BUILD_VARIANT_NAME}/${PACKAGE_NAME}") +endfunction() From 90eb189fee875c6011daff5952681fe4a0c7e4d1 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Thu, 9 Oct 2025 10:31:39 +0200 Subject: [PATCH 05/38] feat(builder): introduce Windows, PowerShell based, kbuilder.ps1 to build kernels on Windows hosts --- build-driver/kbuilder.ps1 | 592 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 592 insertions(+) create mode 100644 build-driver/kbuilder.ps1 diff --git a/build-driver/kbuilder.ps1 b/build-driver/kbuilder.ps1 new file mode 100644 index 00000000..7f0fc0bb --- /dev/null +++ b/build-driver/kbuilder.ps1 @@ -0,0 +1,592 @@ +#Requires -Version 7.0 + +<# +.SYNOPSIS + Kernel Builder - Modern PowerShell wrapper for build2cmake tool + +.DESCRIPTION + This script provides a modular interface to build2cmake for generating CMake + structures from build.toml configuration files. Supports multiple backends + including CUDA, ROCm, Metal, and XPU. + +.PARAMETER SourceFolder + Path to the folder containing build.toml file + +.PARAMETER TargetFolder + Optional destination folder for generated CMake files (defaults to SourceFolder) + +.PARAMETER Backend + Target backend: cuda, rocm, metal, xpu, or universal + +.PARAMETER Build2CmakePath + Path to build2cmake executable (auto-detected if not specified) + +.PARAMETER Force + Force overwrite existing files without prompting + +.PARAMETER OpsId + Optional unique identifier suffixed to kernel names (e.g., Git SHA) + +.PARAMETER Clean + Remove generated artifacts instead of building + +.PARAMETER DryRun + Show what would be done without executing (clean mode only) + +.PARAMETER Validate + Validate build.toml without generating files + +.PARAMETER Build + Build the project after generating CMake files + +.PARAMETER BuildConfig + CMake build configuration (Debug or Release, defaults to Release) + +.PARAMETER ArchList + GPU architectures to build for (backend-agnostic). + For CUDA: e.g., "7.5 8.6" or "Turing Ampere" + For ROCm: e.g., "gfx906;gfx908;gfx90a" + For XPU: Currently not supported via environment variable + +.PARAMETER LocalInstall + Run CMake install target after building (installs to build/// for local development) + +.PARAMETER KernelsInstall + Run kernels_install target after building (installs to CMAKE_INSTALL_PREFIX///) + +.PARAMETER InstallPrefix + Installation prefix for kernels_install target (defaults to CMAKE_INSTALL_PREFIX) + +.EXAMPLE + .\kbuilder.ps1 -SourceFolder ./examples/relu + +.EXAMPLE + .\kbuilder.ps1 -SourceFolder ./examples/relu -Backend cuda -Force + +.EXAMPLE + .\kbuilder.ps1 -SourceFolder ./examples/relu -TargetFolder ./build/relu -OpsId abc123 + +.EXAMPLE + .\kbuilder.ps1 -SourceFolder ./examples/relu -Clean -Force + +.EXAMPLE + .\kbuilder.ps1 -SourceFolder ./examples/relu -Backend cuda -Build -BuildConfig Debug + +.EXAMPLE + .\kbuilder.ps1 -SourceFolder ./examples/relu -Backend cuda -ArchList "7.5 8.6" -Build + +.EXAMPLE + .\kbuilder.ps1 -SourceFolder ./examples/relu -Backend rocm -ArchList "gfx906;gfx908" -Build + +.EXAMPLE + .\kbuilder.ps1 -SourceFolder ./examples/relu -Backend cuda -Build -LocalInstall + +.EXAMPLE + .\kbuilder.ps1 -SourceFolder ./examples/relu -Backend cuda -Build -KernelsInstall -InstallPrefix "C:\kernels" +#> + +[CmdletBinding(DefaultParameterSetName = 'Generate')] +param( + [Parameter(Mandatory = $true, Position = 0, HelpMessage = "Folder containing build.toml")] + [ValidateScript({ Test-Path $_ -PathType Container })] + [string]$SourceFolder, + + [Parameter(ParameterSetName = 'Generate')] + [ValidateScript({ + if ($_ -and !(Test-Path $_ -PathType Container)) { + throw "Target folder does not exist: $_" + } + $true + })] + [string]$TargetFolder, + + [Parameter(ParameterSetName = 'Generate')] + [ValidateSet('cuda', 'rocm', 'metal', 'xpu', 'universal')] + [string]$Backend, + + [Parameter()] + [string]$Build2CmakePath, + + [Parameter(ParameterSetName = 'Generate')] + [switch]$Force, + + [Parameter(ParameterSetName = 'Generate')] + [string]$OpsId, + + [Parameter(ParameterSetName = 'Generate')] + [switch]$Build, + + [Parameter(ParameterSetName = 'Generate')] + [ValidateSet('Debug', 'Release')] + [string]$BuildConfig = 'Release', + + [Parameter(ParameterSetName = 'Generate')] + [string]$ArchList, + + [Parameter(ParameterSetName = 'Generate')] + [switch]$LocalInstall, + + [Parameter(ParameterSetName = 'Generate')] + [switch]$KernelsInstall, + + [Parameter(ParameterSetName = 'Generate')] + [string]$InstallPrefix, + + [Parameter(ParameterSetName = 'Clean', Mandatory = $true)] + [switch]$Clean, + + [Parameter(ParameterSetName = 'Clean')] + [switch]$DryRun, + + [Parameter(ParameterSetName = 'Validate', Mandatory = $true)] + [switch]$Validate +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +#region Helper Functions + +function Write-Status { + param([string]$Message, [string]$Type = 'Info') + + $colors = @{ + 'Info' = 'Cyan' + 'Success' = 'Green' + 'Warning' = 'Yellow' + 'Error' = 'Red' + } + + $prefix = switch ($Type) { + 'Info' { '[*]' } + 'Success' { '[+]' } + 'Warning' { '[!]' } + 'Error' { '[X]' } + } + + Write-Host "$prefix $Message" -ForegroundColor $colors[$Type] +} + +function Find-Build2Cmake { + <# + .SYNOPSIS + Locates build2cmake executable in common locations + #> + + # Check if provided path is valid + if ($Build2CmakePath) { + if (Test-Path $Build2CmakePath -PathType Leaf) { + return $Build2CmakePath + } + throw "Specified build2cmake path not found: $Build2CmakePath" + } + + # Search common locations + $searchPaths = @( + (Join-Path $PSScriptRoot '..' 'build2cmake' 'target' 'release' 'build2cmake.exe'), + (Join-Path $PSScriptRoot '..' 'build2cmake' 'target' 'debug' 'build2cmake.exe'), + 'build2cmake.exe', + 'build2cmake' + ) + + foreach ($path in $searchPaths) { + $resolved = if ([System.IO.Path]::IsPathRooted($path)) { + $path + } else { + Join-Path $PWD $path + } + + if (Test-Path $resolved -PathType Leaf) { + Write-Status "Found build2cmake at: $resolved" -Type Info + return $resolved + } + } + + # Try system PATH + $cmd = Get-Command build2cmake -ErrorAction SilentlyContinue + if ($cmd) { + Write-Status "Using build2cmake from PATH: $($cmd.Source)" -Type Info + return $cmd.Source + } + + throw "build2cmake executable not found. Please build it or specify -Build2CmakePath" +} + +function Get-BuildTomlPath { + param([string]$Folder) + + $buildTomlPath = Join-Path $Folder 'build.toml' + + if (!(Test-Path $buildTomlPath -PathType Leaf)) { + throw "build.toml not found in folder: $Folder" + } + + return $buildTomlPath +} + +function Invoke-Build2Cmake { + param( + [string]$Build2CmakeExe, + [string[]]$Arguments + ) + + Write-Status "Executing: $Build2CmakeExe $($Arguments -join ' ')" -Type Info + + & $Build2CmakeExe @Arguments + + if ($LASTEXITCODE -ne 0) { + throw "build2cmake failed with exit code $LASTEXITCODE" + } +} + +function Initialize-VSEnvironment { + <# + .SYNOPSIS + Initializes Visual Studio build environment for MSBuild/CMake + #> + + Write-Status "Initializing Visual Studio environment..." -Type Info + + # Check if already in VS environment + if ($env:VSINSTALLDIR) { + Write-Status "Visual Studio environment already initialized" -Type Info + return + } + + # Search for vswhere.exe + $vswherePath = "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" + if (!(Test-Path $vswherePath)) { + throw "vswhere.exe not found. Please install Visual Studio 2017 or later." + } + + # Find latest VS installation + $vsPath = & $vswherePath -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath + if (!$vsPath) { + throw "Visual Studio with C++ tools not found. Please install Visual Studio with C++ workload." + } + + Write-Status "Found Visual Studio at: $vsPath" -Type Info + + # Find vcvarsall.bat + $vcvarsPath = Join-Path $vsPath "VC\Auxiliary\Build\vcvarsall.bat" + if (!(Test-Path $vcvarsPath)) { + throw "vcvarsall.bat not found at expected location: $vcvarsPath" + } + + # Execute vcvarsall and capture environment variables + $tempFile = [System.IO.Path]::GetTempFileName() + + # Run vcvarsall.bat and export environment to temp file + cmd /c "`"$vcvarsPath`" x64 && set > `"$tempFile`"" + + if ($LASTEXITCODE -ne 0) { + Remove-Item $tempFile -ErrorAction SilentlyContinue + throw "Failed to initialize Visual Studio environment" + } + + # Parse and apply environment variables + Get-Content $tempFile | ForEach-Object { + if ($_ -match '^([^=]+)=(.*)$') { + $name = $matches[1] + $value = $matches[2] + Set-Item -Path "env:\$name" -Value $value + } + } + + Remove-Item $tempFile -ErrorAction SilentlyContinue + + Write-Status "Visual Studio environment initialized successfully" -Type Success +} + +function Invoke-CMakeBuild { + param( + [string]$SourcePath, + [string]$BuildConfig, + [bool]$RunLocalInstall = $false, + [bool]$RunKernelsInstall = $false, + [string]$InstallPrefix = $null + ) + + Write-Status "Building project with CMake..." -Type Info + Write-Status "Configuration: $BuildConfig" -Type Info + + # Ensure VS environment is initialized + Initialize-VSEnvironment + + # Create build directory + $buildDir = Join-Path $SourcePath "build" + if (!(Test-Path $buildDir)) { + New-Item -ItemType Directory -Path $buildDir | Out-Null + Write-Status "Created build directory: $buildDir" -Type Info + } + + # Configure with CMake + Write-Status "Configuring CMake project..." -Type Info + Push-Location $buildDir + try { + $configureArgs = @("..", "-G", "Visual Studio 17 2022", "-A", "x64") + + # Add install prefix if specified for kernels_install + if (($RunKernelsInstall -or $LocalInstall) -and $InstallPrefix) { + $configureArgs += "-DCMAKE_INSTALL_PREFIX=$InstallPrefix" + Write-Status "Setting CMAKE_INSTALL_PREFIX=$InstallPrefix" -Type Info + } + + cmake @configureArgs + + if ($LASTEXITCODE -ne 0) { + throw "CMake configuration failed with exit code $LASTEXITCODE" + } + + # Build with CMake + Write-Status "Building project..." -Type Info + cmake --build . --config $BuildConfig + + if ($LASTEXITCODE -ne 0) { + throw "CMake build failed with exit code $LASTEXITCODE" + } + + Write-Status "Build completed successfully!" -Type Success + + # Run install target if requested (installs to local development layout) + if ($RunLocalInstall) { + Write-Status "Running install target (local development layout)..." -Type Info + cmake --build . --target local_install --config $BuildConfig + + if ($LASTEXITCODE -ne 0) { + throw "Install target failed with exit code $LASTEXITCODE" + } + + Write-Status "Local install completed successfully!" -Type Success + } + + # Run kernels_install target if requested + if ($RunKernelsInstall) { + Write-Status "Running kernels_install target..." -Type Info + cmake --build . --config $BuildConfig --target kernels_install + + if ($LASTEXITCODE -ne 0) { + throw "kernels_install target failed with exit code $LASTEXITCODE" + } + + Write-Status "Kernels install completed successfully!" -Type Success + } + } + finally { + Pop-Location + } +} + +#endregion + +#region Backend-Specific Functions + +function Invoke-CudaBackend { + param( + [string]$Build2CmakeExe, + [string]$BuildToml, + [string]$Target, + [hashtable]$Options + ) + + Write-Status "Generating CUDA backend..." -Type Info + + $args = @('generate-torch', $BuildToml) + + if ($Target) { $args += $Target } + if ($Options.Force) { $args += '--force' } + if ($Options.OpsId) { $args += '--ops-id', $Options.OpsId } + $args += '--backend', 'cuda' + + Invoke-Build2Cmake -Build2CmakeExe $Build2CmakeExe -Arguments $args +} + +function Invoke-RocmBackend { + param( + [string]$Build2CmakeExe, + [string]$BuildToml, + [string]$Target, + [hashtable]$Options + ) + + Write-Status "Generating ROCm backend..." -Type Info + + $args = @('generate-torch', $BuildToml) + + if ($Target) { $args += $Target } + if ($Options.Force) { $args += '--force' } + if ($Options.OpsId) { $args += '--ops-id', $Options.OpsId } + $args += '--backend', 'rocm' + + Invoke-Build2Cmake -Build2CmakeExe $Build2CmakeExe -Arguments $args +} + +function Invoke-MetalBackend { + param( + [string]$Build2CmakeExe, + [string]$BuildToml, + [string]$Target, + [hashtable]$Options + ) + + Write-Status "Generating Metal backend..." -Type Info + + $args = @('generate-torch', $BuildToml) + + if ($Target) { $args += $Target } + if ($Options.Force) { $args += '--force' } + if ($Options.OpsId) { $args += '--ops-id', $Options.OpsId } + $args += '--backend', 'metal' + + Invoke-Build2Cmake -Build2CmakeExe $Build2CmakeExe -Arguments $args +} + +function Invoke-XpuBackend { + param( + [string]$Build2CmakeExe, + [string]$BuildToml, + [string]$Target, + [hashtable]$Options + ) + + Write-Status "Generating XPU backend..." -Type Info + + $args = @('generate-torch', $BuildToml) + + if ($Target) { $args += $Target } + if ($Options.Force) { $args += '--force' } + if ($Options.OpsId) { $args += '--ops-id', $Options.OpsId } + $args += '--backend', 'xpu' + + Invoke-Build2Cmake -Build2CmakeExe $Build2CmakeExe -Arguments $args +} + +function Invoke-UniversalBackend { + param( + [string]$Build2CmakeExe, + [string]$BuildToml, + [string]$Target, + [hashtable]$Options + ) + + Write-Status "Generating Universal backend..." -Type Info + + $args = @('generate-torch', $BuildToml) + + if ($Target) { $args += $Target } + if ($Options.Force) { $args += '--force' } + if ($Options.OpsId) { $args += '--ops-id', $Options.OpsId } + + Invoke-Build2Cmake -Build2CmakeExe $Build2CmakeExe -Arguments $args +} + +#endregion + +#region Main Logic + +try { + # Resolve paths + $SourceFolder = Resolve-Path $SourceFolder -ErrorAction Stop + $buildTomlPath = Get-BuildTomlPath -Folder $SourceFolder + $build2cmakeExe = Find-Build2Cmake + + # Validate mode + if ($Validate) { + Write-Status "Validating $buildTomlPath..." -Type Info + Invoke-Build2Cmake -Build2CmakeExe $build2cmakeExe -Arguments @('validate', $buildTomlPath) + Write-Status "Validation successful!" -Type Success + exit 0 + } + + # Clean mode + if ($Clean) { + Write-Status "Cleaning generated artifacts..." -Type Warning + + $args = @('clean', $buildTomlPath) + if ($TargetFolder) { $args += $TargetFolder } + if ($DryRun) { $args += '--dry-run' } + if ($Force) { $args += '--force' } + if ($OpsId) { $args += '--ops-id', $OpsId } + + Invoke-Build2Cmake -Build2CmakeExe $build2cmakeExe -Arguments $args + Write-Status "Clean completed!" -Type Success + exit 0 + } + + # Generate mode + # Check for Metal backend on Windows + if ($Backend -and $Backend.ToLower() -eq 'metal') { + throw "Metal backend is not supported on Windows. Metal is only available on macOS." + } + + $options = @{ + Force = $Force.IsPresent + OpsId = $OpsId + } + + # Set architecture environment variables if ArchList is provided + if ($ArchList -and $Backend) { + switch ($Backend.ToLower()) { + 'cuda' { + $env:TORCH_CUDA_ARCH_LIST = $ArchList + Write-Status "Set TORCH_CUDA_ARCH_LIST=$ArchList" -Type Info + } + 'rocm' { + $env:PYTORCH_ROCM_ARCH = $ArchList + Write-Status "Set PYTORCH_ROCM_ARCH=$ArchList" -Type Info + } + 'xpu' { + Write-Status "ArchList not supported for XPU backend (no standard environment variable)" -Type Warning + } + 'metal' { + Write-Status "ArchList not applicable for Metal backend" -Type Warning + } + 'universal' { + Write-Status "ArchList not applicable for Universal backend" -Type Warning + } + } + } + + # Determine backend strategy + if ($Backend) { + # Explicit backend specified + $targetPath = if ($TargetFolder) { Resolve-Path $TargetFolder } else { $null } + + switch ($Backend.ToLower()) { + 'cuda' { Invoke-CudaBackend -Build2CmakeExe $build2cmakeExe -BuildToml $buildTomlPath -Target $targetPath -Options $options } + 'rocm' { Invoke-RocmBackend -Build2CmakeExe $build2cmakeExe -BuildToml $buildTomlPath -Target $targetPath -Options $options } + 'metal' { Invoke-MetalBackend -Build2CmakeExe $build2cmakeExe -BuildToml $buildTomlPath -Target $targetPath -Options $options } + 'xpu' { Invoke-XpuBackend -Build2CmakeExe $build2cmakeExe -BuildToml $buildTomlPath -Target $targetPath -Options $options } + 'universal' { Invoke-UniversalBackend -Build2CmakeExe $build2cmakeExe -BuildToml $buildTomlPath -Target $targetPath -Options $options } + } + } else { + # Auto-detect backend from build.toml + Write-Status "Auto-detecting backend from build.toml..." -Type Info + + $args = @('generate-torch', $buildTomlPath) + if ($TargetFolder) { $args += (Resolve-Path $TargetFolder) } + if ($Force) { $args += '--force' } + if ($OpsId) { $args += '--ops-id', $OpsId } + + Invoke-Build2Cmake -Build2CmakeExe $build2cmakeExe -Arguments $args + } + + Write-Status "Generation completed successfully!" -Type Success + + # Build if requested + if ($Build) { + $buildPath = if ($TargetFolder) { $TargetFolder } else { $SourceFolder } + Invoke-CMakeBuild ` + -SourcePath $buildPath ` + -BuildConfig $BuildConfig ` + -RunLocalInstall $LocalInstall.IsPresent ` + -RunKernelsInstall $KernelsInstall.IsPresent ` + -InstallPrefix $InstallPrefix + } + +} catch { + Write-Status "Error: $_" -Type Error + exit 1 +} + +#endregion \ No newline at end of file From 2f8969f492c3db5edca5b08dd814e3c766c8f52f Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Thu, 9 Oct 2025 14:37:01 +0200 Subject: [PATCH 06/38] feat(builder): attempt to not rely on intel_pytorch_extension to gather XPU version info --- build2cmake/src/templates/xpu/preamble.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build2cmake/src/templates/xpu/preamble.cmake b/build2cmake/src/templates/xpu/preamble.cmake index 314b39fa..fa920266 100644 --- a/build2cmake/src/templates/xpu/preamble.cmake +++ b/build2cmake/src/templates/xpu/preamble.cmake @@ -62,7 +62,7 @@ message(STATUS "Configuring for Intel XPU backend using SYCL") # Generate standardized build name run_python(TORCH_VERSION "import torch; print(torch.__version__.split('+')[0])" "Failed to get Torch version") run_python(CXX11_ABI_VALUE "import torch; print('TRUE' if torch._C._GLIBCXX_USE_CXX11_ABI else 'FALSE')" "Failed to get CXX11 ABI") -run_python(XPU_VERSION "import intel_extension_for_pytorch; print(intel_extension_for_pytorch.__version__.split('+')[0])" "Failed to get XPU version") +run_python(XPU_VERSION "import torch; print(torch.xpu.get_device_capability['version'].split('+')[0])" "Failed to get XPU version") cmake_host_system_information(RESULT HOST_ARCH QUERY OS_PLATFORM) if(CMAKE_SYSTEM_NAME STREQUAL "Linux") set(SYSTEM_STRING "${HOST_ARCH}-linux") From 4db1bc6b8f5306f97f07aef36e322ad3c41e8d2e Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Thu, 9 Oct 2025 14:46:53 +0200 Subject: [PATCH 07/38] feat(builder): missing function call on get_device_capability ... --- build2cmake/src/templates/xpu/preamble.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build2cmake/src/templates/xpu/preamble.cmake b/build2cmake/src/templates/xpu/preamble.cmake index fa920266..c8f56976 100644 --- a/build2cmake/src/templates/xpu/preamble.cmake +++ b/build2cmake/src/templates/xpu/preamble.cmake @@ -62,7 +62,7 @@ message(STATUS "Configuring for Intel XPU backend using SYCL") # Generate standardized build name run_python(TORCH_VERSION "import torch; print(torch.__version__.split('+')[0])" "Failed to get Torch version") run_python(CXX11_ABI_VALUE "import torch; print('TRUE' if torch._C._GLIBCXX_USE_CXX11_ABI else 'FALSE')" "Failed to get CXX11 ABI") -run_python(XPU_VERSION "import torch; print(torch.xpu.get_device_capability['version'].split('+')[0])" "Failed to get XPU version") +run_python(XPU_VERSION "import torch; print(torch.xpu.get_device_capability()['version'].split('+')[0])" "Failed to get XPU version") cmake_host_system_information(RESULT HOST_ARCH QUERY OS_PLATFORM) if(CMAKE_SYSTEM_NAME STREQUAL "Linux") set(SYSTEM_STRING "${HOST_ARCH}-linux") From 93ad5095cb307892f7334f3c56bca68dfe29d5ea Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Mon, 13 Oct 2025 11:23:36 +0200 Subject: [PATCH 08/38] feat(gha): attempt to enable building windows based kernels --- .../workflows/build_kernel_windows.yaml.yml | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 .github/workflows/build_kernel_windows.yaml.yml diff --git a/.github/workflows/build_kernel_windows.yaml.yml b/.github/workflows/build_kernel_windows.yaml.yml new file mode 100644 index 00000000..03abf5e3 --- /dev/null +++ b/.github/workflows/build_kernel_windows.yaml.yml @@ -0,0 +1,26 @@ +name: "Build and test kernel - Windows" +on: + push: + branches: [main] + pull_request: + branches: [main] + types: [opened, synchronize, reopened] # trigger on PRs + workflow_dispatch: + +jobs: + build: + name: Build kernel + runs-on: windows-latest + steps: + - uses: actions/checkout@v4 + - uses: Jimver/cuda-toolkit@v0.2.28 + with: + cuda: '13.0.1' + + - name: "NVCC checks" + run: nvcc -V + + - name: Build activation kernel + run: ( cd examples/activation && .\build-driver\kbuilder.ps1 -SourceFolder . -BuildConfig Release -Backend cuda -Build -Force ) + - name: Copy activation kernel + run: cp -rL examples/activation/result activation-kernel \ No newline at end of file From b0acd2f260be900019e58507ded1747fc3a50505 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Mon, 13 Oct 2025 11:32:13 +0200 Subject: [PATCH 09/38] feat(gha): do not specify cuda version for now --- .github/workflows/build_kernel_windows.yaml.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_kernel_windows.yaml.yml b/.github/workflows/build_kernel_windows.yaml.yml index 03abf5e3..1cbb45e1 100644 --- a/.github/workflows/build_kernel_windows.yaml.yml +++ b/.github/workflows/build_kernel_windows.yaml.yml @@ -12,10 +12,9 @@ jobs: name: Build kernel runs-on: windows-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - uses: Jimver/cuda-toolkit@v0.2.28 - with: - cuda: '13.0.1' + id: setup-cuda-toolkit - name: "NVCC checks" run: nvcc -V From ae7ab63f25e17e1cff57dc0ca26081f783b54603 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Mon, 13 Oct 2025 11:36:13 +0200 Subject: [PATCH 10/38] feat(gha): disable local cache --- .github/workflows/build_kernel_windows.yaml.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build_kernel_windows.yaml.yml b/.github/workflows/build_kernel_windows.yaml.yml index 1cbb45e1..732e166f 100644 --- a/.github/workflows/build_kernel_windows.yaml.yml +++ b/.github/workflows/build_kernel_windows.yaml.yml @@ -15,6 +15,8 @@ jobs: - uses: actions/checkout@v5 - uses: Jimver/cuda-toolkit@v0.2.28 id: setup-cuda-toolkit + with: + use-local-cache: 'false' - name: "NVCC checks" run: nvcc -V From 61c02876139b41c67931ec0f8381576e061eac31 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Mon, 13 Oct 2025 11:40:26 +0200 Subject: [PATCH 11/38] feat(gha): target windows-2022 --- .github/workflows/build_kernel_windows.yaml.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_kernel_windows.yaml.yml b/.github/workflows/build_kernel_windows.yaml.yml index 732e166f..47217ae5 100644 --- a/.github/workflows/build_kernel_windows.yaml.yml +++ b/.github/workflows/build_kernel_windows.yaml.yml @@ -10,13 +10,13 @@ on: jobs: build: name: Build kernel - runs-on: windows-latest + runs-on: windows-2022 steps: - uses: actions/checkout@v5 - uses: Jimver/cuda-toolkit@v0.2.28 id: setup-cuda-toolkit with: - use-local-cache: 'false' + cuda: '13.0.1' - name: "NVCC checks" run: nvcc -V From 4cbaacadc64c73f6de49178fef2c315fe16a3b60 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Mon, 13 Oct 2025 12:22:25 +0200 Subject: [PATCH 12/38] feat(gha): attempt to downgrade the cuda-toolkit action to a one "working" --- .github/workflows/build_kernel_windows.yaml.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_kernel_windows.yaml.yml b/.github/workflows/build_kernel_windows.yaml.yml index 47217ae5..ba048e4a 100644 --- a/.github/workflows/build_kernel_windows.yaml.yml +++ b/.github/workflows/build_kernel_windows.yaml.yml @@ -13,7 +13,7 @@ jobs: runs-on: windows-2022 steps: - uses: actions/checkout@v5 - - uses: Jimver/cuda-toolkit@v0.2.28 + - uses: Jimver/cuda-toolkit@v0.2.24 id: setup-cuda-toolkit with: cuda: '13.0.1' From acf273bd5307a42020a11660f35ef07c2d42ef76 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Mon, 13 Oct 2025 12:24:31 +0200 Subject: [PATCH 13/38] feat(gha): once more? --- .github/workflows/build_kernel_windows.yaml.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_kernel_windows.yaml.yml b/.github/workflows/build_kernel_windows.yaml.yml index ba048e4a..4d8cbe66 100644 --- a/.github/workflows/build_kernel_windows.yaml.yml +++ b/.github/workflows/build_kernel_windows.yaml.yml @@ -13,10 +13,11 @@ jobs: runs-on: windows-2022 steps: - uses: actions/checkout@v5 - - uses: Jimver/cuda-toolkit@v0.2.24 + - uses: N-Storm/cuda-toolkit@v0.2.27m id: setup-cuda-toolkit with: cuda: '13.0.1' + use-github-cache: false - name: "NVCC checks" run: nvcc -V From 1b643e6157a126e0f7d63747b22fc4edb94371e9 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Mon, 13 Oct 2025 12:26:06 +0200 Subject: [PATCH 14/38] feat(gha): use 13.0.0 with this one --- .github/workflows/build_kernel_windows.yaml.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_kernel_windows.yaml.yml b/.github/workflows/build_kernel_windows.yaml.yml index 4d8cbe66..3280ba6e 100644 --- a/.github/workflows/build_kernel_windows.yaml.yml +++ b/.github/workflows/build_kernel_windows.yaml.yml @@ -16,7 +16,7 @@ jobs: - uses: N-Storm/cuda-toolkit@v0.2.27m id: setup-cuda-toolkit with: - cuda: '13.0.1' + cuda: '13.0.0' use-github-cache: false - name: "NVCC checks" From 91c29a2f37b67f97b358f92a48f4a2ca47aa8d86 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Mon, 13 Oct 2025 12:56:39 +0200 Subject: [PATCH 15/38] feat(gha): use correct path towards kbuilder.ps1 --- .github/workflows/build_kernel_windows.yaml.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_kernel_windows.yaml.yml b/.github/workflows/build_kernel_windows.yaml.yml index 3280ba6e..2c417e14 100644 --- a/.github/workflows/build_kernel_windows.yaml.yml +++ b/.github/workflows/build_kernel_windows.yaml.yml @@ -23,6 +23,6 @@ jobs: run: nvcc -V - name: Build activation kernel - run: ( cd examples/activation && .\build-driver\kbuilder.ps1 -SourceFolder . -BuildConfig Release -Backend cuda -Build -Force ) + run: ( .\build-driver\kbuilder.ps1 -SourceFolder examples/activation -BuildConfig Release -Backend cuda -Build -Force ) - name: Copy activation kernel - run: cp -rL examples/activation/result activation-kernel \ No newline at end of file + run: cp -rL examples/activation/build activation-kernel \ No newline at end of file From 360f1eb902d4dee2731efed6010e484d2f009715 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Mon, 13 Oct 2025 13:27:19 +0200 Subject: [PATCH 16/38] feat(gha): build build2cmake and specify path to it for kbuilder --- .../workflows/build_kernel_windows.yaml.yml | 31 ++++++++++++++++--- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_kernel_windows.yaml.yml b/.github/workflows/build_kernel_windows.yaml.yml index 2c417e14..a7e31012 100644 --- a/.github/workflows/build_kernel_windows.yaml.yml +++ b/.github/workflows/build_kernel_windows.yaml.yml @@ -9,20 +9,41 @@ on: jobs: build: + strategy: + matrix: + os: [ windows-2022 ] + version: [ '13.0.0' ] + name: Build kernel - runs-on: windows-2022 + runs-on: ${{ matrix.os }} + steps: + - uses: actions/cache@v4 + with: + key: cuda-toolkit-${{ matrix.version }}-${{ matrix.os }} + path: | + C:\Program Files\NVIDIA GPU Computing Toolkit + ~/.cargo/registry + ~/.cargo/git + - uses: actions/checkout@v5 - uses: N-Storm/cuda-toolkit@v0.2.27m id: setup-cuda-toolkit with: - cuda: '13.0.0' - use-github-cache: false - + cuda: ${{ matrix.version }} - name: "NVCC checks" run: nvcc -V + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + profile: minimal + override: true + + - name: Build build2cmake + run: ( cd build2cmake && cargo build --release ) + - name: Build activation kernel - run: ( .\build-driver\kbuilder.ps1 -SourceFolder examples/activation -BuildConfig Release -Backend cuda -Build -Force ) + run: ( .\build-driver\kbuilder.ps1 -Build2CmakePath=build2cmake/target/release -SourceFolder examples/activation -BuildConfig Release -Backend cuda -Build -Force ) - name: Copy activation kernel run: cp -rL examples/activation/build activation-kernel \ No newline at end of file From ce3d2b9608a474b397a58812aab71118d71b8a74 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Mon, 13 Oct 2025 14:02:05 +0200 Subject: [PATCH 17/38] feat(gha): again --- .github/workflows/build_kernel_windows.yaml.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_kernel_windows.yaml.yml b/.github/workflows/build_kernel_windows.yaml.yml index a7e31012..dd66d2ad 100644 --- a/.github/workflows/build_kernel_windows.yaml.yml +++ b/.github/workflows/build_kernel_windows.yaml.yml @@ -44,6 +44,6 @@ jobs: run: ( cd build2cmake && cargo build --release ) - name: Build activation kernel - run: ( .\build-driver\kbuilder.ps1 -Build2CmakePath=build2cmake/target/release -SourceFolder examples/activation -BuildConfig Release -Backend cuda -Build -Force ) + run: ( .\build-driver\kbuilder.ps1 -Build2CmakePath build2cmake/target/release -SourceFolder examples/activation -BuildConfig Release -Backend cuda -Build -Force ) - name: Copy activation kernel run: cp -rL examples/activation/build activation-kernel \ No newline at end of file From 51b061c28562469d01f9b19b4bbc959466d31c52 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Mon, 13 Oct 2025 14:16:34 +0200 Subject: [PATCH 18/38] feat(gha): let kbuilder discover build2cmake path from root --- .github/workflows/build_kernel_windows.yaml.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_kernel_windows.yaml.yml b/.github/workflows/build_kernel_windows.yaml.yml index dd66d2ad..1e439505 100644 --- a/.github/workflows/build_kernel_windows.yaml.yml +++ b/.github/workflows/build_kernel_windows.yaml.yml @@ -44,6 +44,6 @@ jobs: run: ( cd build2cmake && cargo build --release ) - name: Build activation kernel - run: ( .\build-driver\kbuilder.ps1 -Build2CmakePath build2cmake/target/release -SourceFolder examples/activation -BuildConfig Release -Backend cuda -Build -Force ) + run: ( .\build-driver\kbuilder.ps1 -SourceFolder examples/activation -BuildConfig Release -Backend cuda -Build -Force ) - name: Copy activation kernel run: cp -rL examples/activation/build activation-kernel \ No newline at end of file From a54c4c8b57e6412f647b7f15ac243c8e2761b8e7 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Mon, 13 Oct 2025 14:40:08 +0200 Subject: [PATCH 19/38] feat(gha): setup python --- .../workflows/build_kernel_windows.yaml.yml | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_kernel_windows.yaml.yml b/.github/workflows/build_kernel_windows.yaml.yml index 1e439505..bebce292 100644 --- a/.github/workflows/build_kernel_windows.yaml.yml +++ b/.github/workflows/build_kernel_windows.yaml.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: os: [ windows-2022 ] - version: [ '13.0.0' ] + python: [ '3.12', '3.13' ] name: Build kernel runs-on: ${{ matrix.os }} @@ -27,13 +27,16 @@ jobs: ~/.cargo/git - uses: actions/checkout@v5 + + # CUDA environment setup - uses: N-Storm/cuda-toolkit@v0.2.27m id: setup-cuda-toolkit with: - cuda: ${{ matrix.version }} + cuda: '12.9.1' # TODO(mfuntowicz): How can we test multiple CUDA versions than align with torch? - name: "NVCC checks" run: nvcc -V + # Rust build environment setup - uses: actions-rs/toolchain@v1 with: toolchain: stable @@ -43,7 +46,16 @@ jobs: - name: Build build2cmake run: ( cd build2cmake && cargo build --release ) + # Python environment setup + - uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python }} + cache: 'pip' + + - name: Install PyTorch + run: pip install --index-url https://download.pytorch.org/whl/cu129 + - name: Build activation kernel run: ( .\build-driver\kbuilder.ps1 -SourceFolder examples/activation -BuildConfig Release -Backend cuda -Build -Force ) - - name: Copy activation kernel - run: cp -rL examples/activation/build activation-kernel \ No newline at end of file +# - name: Copy activation kernel +# run: cp -rL examples/activation/build activation-kernel \ No newline at end of file From 5847050452330837262a4ce2d7ee41a410169b2b Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Mon, 13 Oct 2025 14:57:16 +0200 Subject: [PATCH 20/38] feat(gha): ... --- .github/workflows/build_kernel_windows.yaml.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_kernel_windows.yaml.yml b/.github/workflows/build_kernel_windows.yaml.yml index bebce292..d48dfccc 100644 --- a/.github/workflows/build_kernel_windows.yaml.yml +++ b/.github/workflows/build_kernel_windows.yaml.yml @@ -53,7 +53,7 @@ jobs: cache: 'pip' - name: Install PyTorch - run: pip install --index-url https://download.pytorch.org/whl/cu129 + run: pip install torch --index-url https://download.pytorch.org/whl/cu129 - name: Build activation kernel run: ( .\build-driver\kbuilder.ps1 -SourceFolder examples/activation -BuildConfig Release -Backend cuda -Build -Force ) From a23ce205e4285630b2ea57d31ef258d1c256985d Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Mon, 13 Oct 2025 15:44:11 +0200 Subject: [PATCH 21/38] misc(fmt): rustfmt --- build2cmake/src/torch/cuda.rs | 7 ++++++- build2cmake/src/torch/xpu.rs | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/build2cmake/src/torch/cuda.rs b/build2cmake/src/torch/cuda.rs index 206e75ab..74326441 100644 --- a/build2cmake/src/torch/cuda.rs +++ b/build2cmake/src/torch/cuda.rs @@ -351,7 +351,12 @@ pub fn render_kernel( Ok(()) } -pub fn render_extension(env: &Environment, name: &str, ops_name: &str, write: &mut impl Write) -> Result<()> { +pub fn render_extension( + env: &Environment, + name: &str, + ops_name: &str, + write: &mut impl Write, +) -> Result<()> { env.get_template("cuda/torch-extension.cmake") .wrap_err("Cannot get Torch extension template")? .render_to_write( diff --git a/build2cmake/src/torch/xpu.rs b/build2cmake/src/torch/xpu.rs index 1da3ef2e..e7c44672 100644 --- a/build2cmake/src/torch/xpu.rs +++ b/build2cmake/src/torch/xpu.rs @@ -250,7 +250,12 @@ pub fn render_kernel( Ok(()) } -pub fn render_extension(env: &Environment, name: &str, ops_name: &str, write: &mut impl Write) -> Result<()> { +pub fn render_extension( + env: &Environment, + name: &str, + ops_name: &str, + write: &mut impl Write, +) -> Result<()> { env.get_template("xpu/torch-extension.cmake") .wrap_err("Cannot get Torch extension template")? .render_to_write( From 515edac5b43da8bd928cffce552918bca8ffbb19 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Mon, 13 Oct 2025 15:53:12 +0200 Subject: [PATCH 22/38] feat(gha): update key for caching --- .github/workflows/build_kernel_windows.yaml.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_kernel_windows.yaml.yml b/.github/workflows/build_kernel_windows.yaml.yml index d48dfccc..612a90a7 100644 --- a/.github/workflows/build_kernel_windows.yaml.yml +++ b/.github/workflows/build_kernel_windows.yaml.yml @@ -20,7 +20,7 @@ jobs: steps: - uses: actions/cache@v4 with: - key: cuda-toolkit-${{ matrix.version }}-${{ matrix.os }} + key: cuda-toolkit-129-${{ matrix.os }} path: | C:\Program Files\NVIDIA GPU Computing Toolkit ~/.cargo/registry From bd806779017fea71143e80c321097c11071ee160 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Mon, 13 Oct 2025 16:43:28 +0200 Subject: [PATCH 23/38] feat(xpu): use cmake Intel Compiler version to generate build name --- build2cmake/src/templates/xpu/preamble.cmake | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/build2cmake/src/templates/xpu/preamble.cmake b/build2cmake/src/templates/xpu/preamble.cmake index c8f56976..2f56442d 100644 --- a/build2cmake/src/templates/xpu/preamble.cmake +++ b/build2cmake/src/templates/xpu/preamble.cmake @@ -62,7 +62,6 @@ message(STATUS "Configuring for Intel XPU backend using SYCL") # Generate standardized build name run_python(TORCH_VERSION "import torch; print(torch.__version__.split('+')[0])" "Failed to get Torch version") run_python(CXX11_ABI_VALUE "import torch; print('TRUE' if torch._C._GLIBCXX_USE_CXX11_ABI else 'FALSE')" "Failed to get CXX11 ABI") -run_python(XPU_VERSION "import torch; print(torch.xpu.get_device_capability()['version'].split('+')[0])" "Failed to get XPU version") cmake_host_system_information(RESULT HOST_ARCH QUERY OS_PLATFORM) if(CMAKE_SYSTEM_NAME STREQUAL "Linux") set(SYSTEM_STRING "${HOST_ARCH}-linux") @@ -72,4 +71,4 @@ else() set(SYSTEM_STRING "${HOST_ARCH}-${CMAKE_SYSTEM_NAME}") endif() -generate_build_name(BUILD_VARIANT_NAME "${TORCH_VERSION}" ${CXX11_ABI_VALUE} "xpu" "${XPU_VERSION}" "${SYSTEM_STRING}") +generate_build_name(BUILD_VARIANT_NAME "${TORCH_VERSION}" ${CXX11_ABI_VALUE} "xpu" "${DPCPP_VERSION}" "${SYSTEM_STRING}") From 7493b6eb4b58345a461c8aa81bf4b39ef0885209 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Tue, 14 Oct 2025 09:52:44 +0200 Subject: [PATCH 24/38] feat(gha): enable some more tests for windows --- .../workflows/build_kernel_windows.yaml.yml | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_kernel_windows.yaml.yml b/.github/workflows/build_kernel_windows.yaml.yml index 612a90a7..4a09025c 100644 --- a/.github/workflows/build_kernel_windows.yaml.yml +++ b/.github/workflows/build_kernel_windows.yaml.yml @@ -58,4 +58,26 @@ jobs: - name: Build activation kernel run: ( .\build-driver\kbuilder.ps1 -SourceFolder examples/activation -BuildConfig Release -Backend cuda -Build -Force ) # - name: Copy activation kernel -# run: cp -rL examples/activation/build activation-kernel \ No newline at end of file +# run: cp -rL examples/activation/build activation-kernel + + - name: Build cutlass GEMM kernel + run: ( .\build-driver\kbuilder.ps1 -SourceFolder examples/cutlass-gemm -BuildConfig Release -Backend cuda -Build -Force ) +# - name: Copy cutlass GEMM kernel +# run: cp -rL examples/cutlass-gemm/result cutlass-gemm-kernel + + - name: Build relu kernel + run: ( .\build-driver\kbuilder.ps1 -SourceFolder examples/relu -BuildConfig Release -Backend cuda -Build -Force ) +# - name: Copy relu kernel +# run: cp -rL examples/relu/result relu-kernel + + - name: Build relu-backprop-compile kernel + run: ( .\build-driver\kbuilder.ps1 -SourceFolder examples/relu-backprop-compile -BuildConfig Release -Backend cuda -Build -Force ) +# - name: Copy relu-backprop-compile kernel +# run: cp -rL examples/relu-backprop-compile/result relu-backprop-compile-kernel + + # Just test that we build with the extra torchVersions argument. +# - name: Build relu kernel (specific Torch version) +# run: ( cd examples/relu-specific-torch && nix build . ) + + - name: Build silu-and-mul-universal kernel + run: ( .\build-driver\kbuilder.ps1 -SourceFolder examples/silu-and-mul-universal -BuildConfig Release -Backend cuda -Build -Force) \ No newline at end of file From dbd89976a84c07d95065480d869f7cdf0b33bcac Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Tue, 14 Oct 2025 11:58:25 +0200 Subject: [PATCH 25/38] feat(gha): disable universal kernels, need investigation --- .github/workflows/build_kernel_windows.yaml.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_kernel_windows.yaml.yml b/.github/workflows/build_kernel_windows.yaml.yml index 4a09025c..2bfb51f6 100644 --- a/.github/workflows/build_kernel_windows.yaml.yml +++ b/.github/workflows/build_kernel_windows.yaml.yml @@ -79,5 +79,5 @@ jobs: # - name: Build relu kernel (specific Torch version) # run: ( cd examples/relu-specific-torch && nix build . ) - - name: Build silu-and-mul-universal kernel - run: ( .\build-driver\kbuilder.ps1 -SourceFolder examples/silu-and-mul-universal -BuildConfig Release -Backend cuda -Build -Force) \ No newline at end of file +# - name: Build silu-and-mul-universal kernel +# run: ( .\build-driver\kbuilder.ps1 -SourceFolder examples/silu-and-mul-universal -BuildConfig Release -Backend cuda -Build -Force) \ No newline at end of file From 26bb852eac6eeac1da4496e9ca1d55c2653ca5ad Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Tue, 14 Oct 2025 14:06:26 +0200 Subject: [PATCH 26/38] misc(gha): remove double yml extension --- .../{build_kernel_windows.yaml.yml => build_kernel_windows.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{build_kernel_windows.yaml.yml => build_kernel_windows.yaml} (100%) diff --git a/.github/workflows/build_kernel_windows.yaml.yml b/.github/workflows/build_kernel_windows.yaml similarity index 100% rename from .github/workflows/build_kernel_windows.yaml.yml rename to .github/workflows/build_kernel_windows.yaml From 9753e4969a0e86e4ce120b39c1a0cfb9b19ceecf Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Tue, 14 Oct 2025 15:21:44 +0200 Subject: [PATCH 27/38] misc(builder): refactored some duplicated fragments --- .../windows/builder.ps1 | 248 ++++++++---------- 1 file changed, 103 insertions(+), 145 deletions(-) rename build-driver/kbuilder.ps1 => scripts/windows/builder.ps1 (67%) diff --git a/build-driver/kbuilder.ps1 b/scripts/windows/builder.ps1 similarity index 67% rename from build-driver/kbuilder.ps1 rename to scripts/windows/builder.ps1 index 7f0fc0bb..1dd9048d 100644 --- a/build-driver/kbuilder.ps1 +++ b/scripts/windows/builder.ps1 @@ -58,31 +58,31 @@ Installation prefix for kernels_install target (defaults to CMAKE_INSTALL_PREFIX) .EXAMPLE - .\kbuilder.ps1 -SourceFolder ./examples/relu + .\builder.ps1 -SourceFolder ./examples/relu .EXAMPLE - .\kbuilder.ps1 -SourceFolder ./examples/relu -Backend cuda -Force + .\builder.ps1 -SourceFolder ./examples/relu -Backend cuda -Force .EXAMPLE - .\kbuilder.ps1 -SourceFolder ./examples/relu -TargetFolder ./build/relu -OpsId abc123 + .\builder.ps1 -SourceFolder ./examples/relu -TargetFolder ./build/relu -OpsId abc123 .EXAMPLE - .\kbuilder.ps1 -SourceFolder ./examples/relu -Clean -Force + .\builder.ps1 -SourceFolder ./examples/relu -Clean -Force .EXAMPLE - .\kbuilder.ps1 -SourceFolder ./examples/relu -Backend cuda -Build -BuildConfig Debug + .\builder.ps1 -SourceFolder ./examples/relu -Backend cuda -Build -BuildConfig Debug .EXAMPLE - .\kbuilder.ps1 -SourceFolder ./examples/relu -Backend cuda -ArchList "7.5 8.6" -Build + .\builder.ps1 -SourceFolder ./examples/relu -Backend cuda -ArchList "7.5 8.6" -Build .EXAMPLE - .\kbuilder.ps1 -SourceFolder ./examples/relu -Backend rocm -ArchList "gfx906;gfx908" -Build + .\builder.ps1 -SourceFolder ./examples/relu -Backend rocm -ArchList "gfx906;gfx908" -Build .EXAMPLE - .\kbuilder.ps1 -SourceFolder ./examples/relu -Backend cuda -Build -LocalInstall + .\builder.ps1 -SourceFolder ./examples/relu -Backend cuda -Build -LocalInstall .EXAMPLE - .\kbuilder.ps1 -SourceFolder ./examples/relu -Backend cuda -Build -KernelsInstall -InstallPrefix "C:\kernels" + .\builder.ps1 -SourceFolder ./examples/relu -Backend cuda -Build -KernelsInstall -InstallPrefix "C:\kernels" #> [CmdletBinding(DefaultParameterSetName = 'Generate')] @@ -239,6 +239,20 @@ function Invoke-Build2Cmake { } } +function Import-EnvironmentVariables { + <# + .SYNOPSIS + Imports environment variables from a file + #> + param([string]$FilePath) + + Get-Content $FilePath | ForEach-Object { + if ($_ -match '^([^=]+)=(.*)$') { + Set-Item -Path "env:$($matches[1])" -Value $matches[2] + } + } +} + function Initialize-VSEnvironment { <# .SYNOPSIS @@ -285,19 +299,54 @@ function Initialize-VSEnvironment { } # Parse and apply environment variables - Get-Content $tempFile | ForEach-Object { - if ($_ -match '^([^=]+)=(.*)$') { - $name = $matches[1] - $value = $matches[2] - Set-Item -Path "env:\$name" -Value $value - } - } + Import-EnvironmentVariables -FilePath $tempFile Remove-Item $tempFile -ErrorAction SilentlyContinue Write-Status "Visual Studio environment initialized successfully" -Type Success } +function Get-CMakeConfigureArgs { + <# + .SYNOPSIS + Builds CMake configuration arguments + #> + param( + [bool]$ShouldInstall, + [string]$InstallPrefix + ) + + $args = @("..", "-G", "Visual Studio 17 2022", "-A", "x64") + + if ($ShouldInstall -and $InstallPrefix) { + $args += "-DCMAKE_INSTALL_PREFIX=$InstallPrefix" + Write-Status "Setting CMAKE_INSTALL_PREFIX=$InstallPrefix" -Type Info + } + + return $args +} + +function Invoke-CMakeTarget { + <# + .SYNOPSIS + Executes a CMake build target + #> + param( + [string]$Target, + [string]$BuildConfig, + [string]$DisplayName + ) + + Write-Status "Running $DisplayName..." -Type Info + cmake --build . --target $Target --config $BuildConfig + + if ($LASTEXITCODE -ne 0) { + throw "$DisplayName failed with exit code $LASTEXITCODE" + } + + Write-Status "$DisplayName completed successfully!" -Type Success +} + function Invoke-CMakeBuild { param( [string]$SourcePath, @@ -324,13 +373,7 @@ function Invoke-CMakeBuild { Write-Status "Configuring CMake project..." -Type Info Push-Location $buildDir try { - $configureArgs = @("..", "-G", "Visual Studio 17 2022", "-A", "x64") - - # Add install prefix if specified for kernels_install - if (($RunKernelsInstall -or $LocalInstall) -and $InstallPrefix) { - $configureArgs += "-DCMAKE_INSTALL_PREFIX=$InstallPrefix" - Write-Status "Setting CMAKE_INSTALL_PREFIX=$InstallPrefix" -Type Info - } + $configureArgs = Get-CMakeConfigureArgs -ShouldInstall ($RunKernelsInstall -or $RunLocalInstall) -InstallPrefix $InstallPrefix cmake @configureArgs @@ -348,28 +391,13 @@ function Invoke-CMakeBuild { Write-Status "Build completed successfully!" -Type Success - # Run install target if requested (installs to local development layout) + # Run install targets if requested if ($RunLocalInstall) { - Write-Status "Running install target (local development layout)..." -Type Info - cmake --build . --target local_install --config $BuildConfig - - if ($LASTEXITCODE -ne 0) { - throw "Install target failed with exit code $LASTEXITCODE" - } - - Write-Status "Local install completed successfully!" -Type Success + Invoke-CMakeTarget -Target 'local_install' -BuildConfig $BuildConfig -DisplayName 'install target (local development layout)' } - # Run kernels_install target if requested if ($RunKernelsInstall) { - Write-Status "Running kernels_install target..." -Type Info - cmake --build . --config $BuildConfig --target kernels_install - - if ($LASTEXITCODE -ne 0) { - throw "kernels_install target failed with exit code $LASTEXITCODE" - } - - Write-Status "Kernels install completed successfully!" -Type Success + Invoke-CMakeTarget -Target 'kernels_install' -BuildConfig $BuildConfig -DisplayName 'kernels_install target' } } finally { @@ -381,103 +409,58 @@ function Invoke-CMakeBuild { #region Backend-Specific Functions -function Invoke-CudaBackend { - param( - [string]$Build2CmakeExe, - [string]$BuildToml, - [string]$Target, - [hashtable]$Options - ) - - Write-Status "Generating CUDA backend..." -Type Info - - $args = @('generate-torch', $BuildToml) - - if ($Target) { $args += $Target } - if ($Options.Force) { $args += '--force' } - if ($Options.OpsId) { $args += '--ops-id', $Options.OpsId } - $args += '--backend', 'cuda' - - Invoke-Build2Cmake -Build2CmakeExe $Build2CmakeExe -Arguments $args -} - -function Invoke-RocmBackend { - param( - [string]$Build2CmakeExe, - [string]$BuildToml, - [string]$Target, - [hashtable]$Options - ) - - Write-Status "Generating ROCm backend..." -Type Info - - $args = @('generate-torch', $BuildToml) - - if ($Target) { $args += $Target } - if ($Options.Force) { $args += '--force' } - if ($Options.OpsId) { $args += '--ops-id', $Options.OpsId } - $args += '--backend', 'rocm' - - Invoke-Build2Cmake -Build2CmakeExe $Build2CmakeExe -Arguments $args -} - -function Invoke-MetalBackend { - param( - [string]$Build2CmakeExe, - [string]$BuildToml, - [string]$Target, - [hashtable]$Options - ) - - Write-Status "Generating Metal backend..." -Type Info - - $args = @('generate-torch', $BuildToml) - - if ($Target) { $args += $Target } - if ($Options.Force) { $args += '--force' } - if ($Options.OpsId) { $args += '--ops-id', $Options.OpsId } - $args += '--backend', 'metal' - - Invoke-Build2Cmake -Build2CmakeExe $Build2CmakeExe -Arguments $args -} - -function Invoke-XpuBackend { +function Invoke-Backend { + <# + .SYNOPSIS + Generates CMake files for specified backend + #> param( [string]$Build2CmakeExe, [string]$BuildToml, [string]$Target, - [hashtable]$Options + [hashtable]$Options, + [string]$Backend ) - Write-Status "Generating XPU backend..." -Type Info + $backendName = if ($Backend -eq 'universal') { 'Universal' } else { $Backend.ToUpper() } + Write-Status "Generating $backendName backend..." -Type Info $args = @('generate-torch', $BuildToml) if ($Target) { $args += $Target } if ($Options.Force) { $args += '--force' } if ($Options.OpsId) { $args += '--ops-id', $Options.OpsId } - $args += '--backend', 'xpu' + if ($Backend -and $Backend -ne 'universal') { $args += '--backend', $Backend } Invoke-Build2Cmake -Build2CmakeExe $Build2CmakeExe -Arguments $args } -function Invoke-UniversalBackend { +function Set-BackendArchitecture { + <# + .SYNOPSIS + Configures backend-specific architecture environment variables + #> param( - [string]$Build2CmakeExe, - [string]$BuildToml, - [string]$Target, - [hashtable]$Options + [string]$Backend, + [string]$ArchList ) - Write-Status "Generating Universal backend..." -Type Info - - $args = @('generate-torch', $BuildToml) - - if ($Target) { $args += $Target } - if ($Options.Force) { $args += '--force' } - if ($Options.OpsId) { $args += '--ops-id', $Options.OpsId } + $archMappings = @{ + 'cuda' = @{ Env = 'TORCH_CUDA_ARCH_LIST'; Supported = $true } + 'rocm' = @{ Env = 'PYTORCH_ROCM_ARCH'; Supported = $true } + 'xpu' = @{ Env = $null; Supported = $false; Message = 'no standard environment variable' } + } - Invoke-Build2Cmake -Build2CmakeExe $Build2CmakeExe -Arguments $args + if ($mapping = $archMappings[$Backend.ToLower()]) { + if ($mapping.Supported) { + Set-Item "env:$($mapping.Env)" -Value $ArchList + Write-Status "Set $($mapping.Env)=$ArchList" -Type Info + } else { + Write-Status "ArchList not supported for $Backend backend ($($mapping.Message))" -Type Warning + } + } else { + Write-Status "ArchList not applicable for $Backend backend" -Type Warning + } } #endregion @@ -526,39 +509,14 @@ try { # Set architecture environment variables if ArchList is provided if ($ArchList -and $Backend) { - switch ($Backend.ToLower()) { - 'cuda' { - $env:TORCH_CUDA_ARCH_LIST = $ArchList - Write-Status "Set TORCH_CUDA_ARCH_LIST=$ArchList" -Type Info - } - 'rocm' { - $env:PYTORCH_ROCM_ARCH = $ArchList - Write-Status "Set PYTORCH_ROCM_ARCH=$ArchList" -Type Info - } - 'xpu' { - Write-Status "ArchList not supported for XPU backend (no standard environment variable)" -Type Warning - } - 'metal' { - Write-Status "ArchList not applicable for Metal backend" -Type Warning - } - 'universal' { - Write-Status "ArchList not applicable for Universal backend" -Type Warning - } - } + Set-BackendArchitecture -Backend $Backend -ArchList $ArchList } # Determine backend strategy if ($Backend) { # Explicit backend specified $targetPath = if ($TargetFolder) { Resolve-Path $TargetFolder } else { $null } - - switch ($Backend.ToLower()) { - 'cuda' { Invoke-CudaBackend -Build2CmakeExe $build2cmakeExe -BuildToml $buildTomlPath -Target $targetPath -Options $options } - 'rocm' { Invoke-RocmBackend -Build2CmakeExe $build2cmakeExe -BuildToml $buildTomlPath -Target $targetPath -Options $options } - 'metal' { Invoke-MetalBackend -Build2CmakeExe $build2cmakeExe -BuildToml $buildTomlPath -Target $targetPath -Options $options } - 'xpu' { Invoke-XpuBackend -Build2CmakeExe $build2cmakeExe -BuildToml $buildTomlPath -Target $targetPath -Options $options } - 'universal' { Invoke-UniversalBackend -Build2CmakeExe $build2cmakeExe -BuildToml $buildTomlPath -Target $targetPath -Options $options } - } + Invoke-Backend -Build2CmakeExe $build2cmakeExe -BuildToml $buildTomlPath -Target $targetPath -Options $options -Backend $Backend.ToLower() } else { # Auto-detect backend from build.toml Write-Status "Auto-detecting backend from build.toml..." -Type Info From c824402999dabebb7dc8436bbcdab8f7e0edd16f Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Tue, 14 Oct 2025 15:23:00 +0200 Subject: [PATCH 28/38] misc(builder): update gha workflow with new paths --- .github/workflows/build_kernel_windows.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build_kernel_windows.yaml b/.github/workflows/build_kernel_windows.yaml index 2bfb51f6..56b772b3 100644 --- a/.github/workflows/build_kernel_windows.yaml +++ b/.github/workflows/build_kernel_windows.yaml @@ -56,22 +56,22 @@ jobs: run: pip install torch --index-url https://download.pytorch.org/whl/cu129 - name: Build activation kernel - run: ( .\build-driver\kbuilder.ps1 -SourceFolder examples/activation -BuildConfig Release -Backend cuda -Build -Force ) + run: ( scripts\windows\builder.ps1 -SourceFolder examples/activation -BuildConfig Release -Backend cuda -Build -Force ) # - name: Copy activation kernel # run: cp -rL examples/activation/build activation-kernel - name: Build cutlass GEMM kernel - run: ( .\build-driver\kbuilder.ps1 -SourceFolder examples/cutlass-gemm -BuildConfig Release -Backend cuda -Build -Force ) + run: ( scripts\windows\builder.ps1 -SourceFolder examples/cutlass-gemm -BuildConfig Release -Backend cuda -Build -Force ) # - name: Copy cutlass GEMM kernel # run: cp -rL examples/cutlass-gemm/result cutlass-gemm-kernel - name: Build relu kernel - run: ( .\build-driver\kbuilder.ps1 -SourceFolder examples/relu -BuildConfig Release -Backend cuda -Build -Force ) + run: ( scripts\windows\builder.ps1 -SourceFolder examples/relu -BuildConfig Release -Backend cuda -Build -Force ) # - name: Copy relu kernel # run: cp -rL examples/relu/result relu-kernel - name: Build relu-backprop-compile kernel - run: ( .\build-driver\kbuilder.ps1 -SourceFolder examples/relu-backprop-compile -BuildConfig Release -Backend cuda -Build -Force ) + run: ( scripts\windows\builder.ps1 -SourceFolder examples/relu-backprop-compile -BuildConfig Release -Backend cuda -Build -Force ) # - name: Copy relu-backprop-compile kernel # run: cp -rL examples/relu-backprop-compile/result relu-backprop-compile-kernel @@ -79,5 +79,5 @@ jobs: # - name: Build relu kernel (specific Torch version) # run: ( cd examples/relu-specific-torch && nix build . ) -# - name: Build silu-and-mul-universal kernel -# run: ( .\build-driver\kbuilder.ps1 -SourceFolder examples/silu-and-mul-universal -BuildConfig Release -Backend cuda -Build -Force) \ No newline at end of file + - name: Build silu-and-mul-universal kernel + run: ( scripts\windows\builder.ps1 -SourceFolder examples/silu-and-mul-universal -BuildConfig Release -Build -Force) \ No newline at end of file From b8ef512c6c72d8a68d2dd7b76d2e7815d2939361 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Tue, 14 Oct 2025 18:51:42 +0200 Subject: [PATCH 29/38] feat(gha): update to latest version of cuda-toolkit --- .github/workflows/build_kernel_windows.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_kernel_windows.yaml b/.github/workflows/build_kernel_windows.yaml index 56b772b3..c62862a0 100644 --- a/.github/workflows/build_kernel_windows.yaml +++ b/.github/workflows/build_kernel_windows.yaml @@ -29,7 +29,7 @@ jobs: - uses: actions/checkout@v5 # CUDA environment setup - - uses: N-Storm/cuda-toolkit@v0.2.27m + - uses: N-Storm/cuda-toolkit@v0.2.28 id: setup-cuda-toolkit with: cuda: '12.9.1' # TODO(mfuntowicz): How can we test multiple CUDA versions than align with torch? From 07500fe5da02073facfc741900c1375486bffaa7 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Wed, 15 Oct 2025 10:22:02 +0200 Subject: [PATCH 30/38] misc(builder): update build2cmake default location --- scripts/windows/builder.ps1 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/windows/builder.ps1 b/scripts/windows/builder.ps1 index 1dd9048d..8db43277 100644 --- a/scripts/windows/builder.ps1 +++ b/scripts/windows/builder.ps1 @@ -183,8 +183,8 @@ function Find-Build2Cmake { # Search common locations $searchPaths = @( - (Join-Path $PSScriptRoot '..' 'build2cmake' 'target' 'release' 'build2cmake.exe'), - (Join-Path $PSScriptRoot '..' 'build2cmake' 'target' 'debug' 'build2cmake.exe'), + (Join-Path $PSScriptRoot '..' '..' 'build2cmake' 'target' 'release' 'build2cmake.exe'), + (Join-Path $PSScriptRoot '..' '..' 'build2cmake' 'target' 'debug' 'build2cmake.exe'), 'build2cmake.exe', 'build2cmake' ) From 57c2e9fbad89fbfeda2e4a442d0002f1a92a4490 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Wed, 15 Oct 2025 21:31:49 +0200 Subject: [PATCH 31/38] misc(builder): rename args variable to kwargs to not override builtin args variable --- scripts/windows/builder.ps1 | 40 ++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/scripts/windows/builder.ps1 b/scripts/windows/builder.ps1 index 8db43277..586e15ee 100644 --- a/scripts/windows/builder.ps1 +++ b/scripts/windows/builder.ps1 @@ -316,14 +316,14 @@ function Get-CMakeConfigureArgs { [string]$InstallPrefix ) - $args = @("..", "-G", "Visual Studio 17 2022", "-A", "x64") + $kwargs = @("..", "-G", "Visual Studio 17 2022", "-A", "x64") if ($ShouldInstall -and $InstallPrefix) { - $args += "-DCMAKE_INSTALL_PREFIX=$InstallPrefix" + $kwargs += "-DCMAKE_INSTALL_PREFIX=$InstallPrefix" Write-Status "Setting CMAKE_INSTALL_PREFIX=$InstallPrefix" -Type Info } - return $args + return $kwargs } function Invoke-CMakeTarget { @@ -425,14 +425,14 @@ function Invoke-Backend { $backendName = if ($Backend -eq 'universal') { 'Universal' } else { $Backend.ToUpper() } Write-Status "Generating $backendName backend..." -Type Info - $args = @('generate-torch', $BuildToml) + $kwargs = @('generate-torch', $BuildToml) - if ($Target) { $args += $Target } - if ($Options.Force) { $args += '--force' } - if ($Options.OpsId) { $args += '--ops-id', $Options.OpsId } - if ($Backend -and $Backend -ne 'universal') { $args += '--backend', $Backend } + if ($Target) { $kwargs += $Target } + if ($Options.Force) { $kwargs += '--force' } + if ($Options.OpsId) { $kwargs += '--ops-id', $Options.OpsId } + if ($Backend -and $Backend -ne 'universal') { $kwargs += '--backend', $Backend } - Invoke-Build2Cmake -Build2CmakeExe $Build2CmakeExe -Arguments $args + Invoke-Build2Cmake -Build2CmakeExe $Build2CmakeExe -Arguments $kwargs } function Set-BackendArchitecture { @@ -485,13 +485,13 @@ try { if ($Clean) { Write-Status "Cleaning generated artifacts..." -Type Warning - $args = @('clean', $buildTomlPath) - if ($TargetFolder) { $args += $TargetFolder } - if ($DryRun) { $args += '--dry-run' } - if ($Force) { $args += '--force' } - if ($OpsId) { $args += '--ops-id', $OpsId } + $kwargs = @('clean', $buildTomlPath) + if ($TargetFolder) { $kwargs += $TargetFolder } + if ($DryRun) { $kwargs += '--dry-run' } + if ($Force) { $kwargs += '--force' } + if ($OpsId) { $kwargs += '--ops-id', $OpsId } - Invoke-Build2Cmake -Build2CmakeExe $build2cmakeExe -Arguments $args + Invoke-Build2Cmake -Build2CmakeExe $build2cmakeExe -Arguments $kwargs Write-Status "Clean completed!" -Type Success exit 0 } @@ -521,12 +521,12 @@ try { # Auto-detect backend from build.toml Write-Status "Auto-detecting backend from build.toml..." -Type Info - $args = @('generate-torch', $buildTomlPath) - if ($TargetFolder) { $args += (Resolve-Path $TargetFolder) } - if ($Force) { $args += '--force' } - if ($OpsId) { $args += '--ops-id', $OpsId } + $kwargs = @('generate-torch', $buildTomlPath) + if ($TargetFolder) { $kwargs += (Resolve-Path $TargetFolder) } + if ($Force) { $kwargs += '--force' } + if ($OpsId) { $kwargs += '--ops-id', $OpsId } - Invoke-Build2Cmake -Build2CmakeExe $build2cmakeExe -Arguments $args + Invoke-Build2Cmake -Build2CmakeExe $build2cmakeExe -Arguments $kwargs } Write-Status "Generation completed successfully!" -Type Success From 5f6c3b1a39ec3e250d1943a5f69b5187fcf27b56 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Wed, 15 Oct 2025 21:49:27 +0200 Subject: [PATCH 32/38] misc(builder): do not attempt to build if there is no CMakeLists.txt --- scripts/windows/builder.ps1 | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/scripts/windows/builder.ps1 b/scripts/windows/builder.ps1 index 586e15ee..16ea060a 100644 --- a/scripts/windows/builder.ps1 +++ b/scripts/windows/builder.ps1 @@ -531,15 +531,21 @@ try { Write-Status "Generation completed successfully!" -Type Success - # Build if requested + # Build if requested (skip if no CMakeLists.txt exists, e.g., universal backend) if ($Build) { $buildPath = if ($TargetFolder) { $TargetFolder } else { $SourceFolder } - Invoke-CMakeBuild ` - -SourcePath $buildPath ` - -BuildConfig $BuildConfig ` - -RunLocalInstall $LocalInstall.IsPresent ` - -RunKernelsInstall $KernelsInstall.IsPresent ` - -InstallPrefix $InstallPrefix + $cmakeListsPath = Join-Path $buildPath "CMakeLists.txt" + + if (!(Test-Path $cmakeListsPath -PathType Leaf)) { + Write-Status "No CMakeLists.txt found, skipping build (likely universal backend)" -Type Info + } else { + Invoke-CMakeBuild ` + -SourcePath $buildPath ` + -BuildConfig $BuildConfig ` + -RunLocalInstall $LocalInstall.IsPresent ` + -RunKernelsInstall $KernelsInstall.IsPresent ` + -InstallPrefix $InstallPrefix + } } } catch { From 2b9ac6ce28889985124847eb2f4caa343b0408c6 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Wed, 15 Oct 2025 22:00:26 +0200 Subject: [PATCH 33/38] misc(builder): setup for multiple torch and cuda versions --- .github/workflows/build_kernel_windows.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_kernel_windows.yaml b/.github/workflows/build_kernel_windows.yaml index c62862a0..24a36e65 100644 --- a/.github/workflows/build_kernel_windows.yaml +++ b/.github/workflows/build_kernel_windows.yaml @@ -13,6 +13,9 @@ jobs: matrix: os: [ windows-2022 ] python: [ '3.12', '3.13' ] + torch: [ + { version: '2.8', cuda: '12.9.1', wheel: '129' } + ] name: Build kernel runs-on: ${{ matrix.os }} @@ -20,7 +23,7 @@ jobs: steps: - uses: actions/cache@v4 with: - key: cuda-toolkit-129-${{ matrix.os }} + key: cuda-toolkit-v${{ matrix.cuda }}-${{ matrix.os }} path: | C:\Program Files\NVIDIA GPU Computing Toolkit ~/.cargo/registry @@ -32,7 +35,7 @@ jobs: - uses: N-Storm/cuda-toolkit@v0.2.28 id: setup-cuda-toolkit with: - cuda: '12.9.1' # TODO(mfuntowicz): How can we test multiple CUDA versions than align with torch? + cuda: ${{ matrix.torch.cuda }} # TODO(mfuntowicz): How can we test multiple CUDA versions than align with torch? - name: "NVCC checks" run: nvcc -V From 53b383c2eaa56809e312d5f2afbea117991abdb4 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Thu, 16 Oct 2025 15:52:59 +0200 Subject: [PATCH 34/38] misc(builder): move windows only logic to specific windows.cmake and generate necessary code through build2cmake --- build2cmake/src/templates/cuda/preamble.cmake | 18 +- build2cmake/src/templates/utils.cmake | 193 ------------------ build2cmake/src/templates/windows.cmake | 176 ++++++++++++++++ build2cmake/src/torch/cuda.rs | 11 +- 4 files changed, 195 insertions(+), 203 deletions(-) create mode 100644 build2cmake/src/templates/windows.cmake diff --git a/build2cmake/src/templates/cuda/preamble.cmake b/build2cmake/src/templates/cuda/preamble.cmake index ca806f99..71883242 100644 --- a/build2cmake/src/templates/cuda/preamble.cmake +++ b/build2cmake/src/templates/cuda/preamble.cmake @@ -99,19 +99,18 @@ else() "${${GPU_LANG}_SUPPORTED_ARCHS}") endif() + +message(STATUS "Rendered for platform {{ platform }}") +{% if platform == 'windows' %} +include(${CMAKE_CURRENT_LIST_DIR}/cmake/windows.cmake) + # Generate standardized build name run_python(TORCH_VERSION "import torch; print(torch.__version__.split('+')[0])" "Failed to get Torch version") run_python(CXX11_ABI_VALUE "import torch; print('TRUE' if torch._C._GLIBCXX_USE_CXX11_ABI else 'FALSE')" "Failed to get CXX11 ABI") cmake_host_system_information(RESULT HOST_ARCH QUERY OS_PLATFORM) -if(CMAKE_SYSTEM_NAME STREQUAL "Linux") - set(SYSTEM_STRING "${HOST_ARCH}-linux") -elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") - set(SYSTEM_STRING "${HOST_ARCH}-darwin") -elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows") - set(SYSTEM_STRING "${HOST_ARCH}-windows") -else() - set(SYSTEM_STRING "${HOST_ARCH}-${CMAKE_SYSTEM_NAME}") -endif() + +set(SYSTEM_STRING "${HOST_ARCH}-windows") +set(SYSTEM_STRING "${HOST_ARCH}-${CMAKE_SYSTEM_NAME}") if(GPU_LANG STREQUAL "CUDA") generate_build_name(BUILD_VARIANT_NAME "${TORCH_VERSION}" ${CXX11_ABI_VALUE} "cuda" "${CUDA_VERSION}" "${SYSTEM_STRING}") @@ -119,3 +118,4 @@ elseif(GPU_LANG STREQUAL "HIP") run_python(ROCM_VERSION "import torch.version; print(torch.version.hip.split('.')[0] + '.' + torch.version.hip.split('.')[1])" "Failed to get ROCm version") generate_build_name(BUILD_VARIANT_NAME "${TORCH_VERSION}" ${CXX11_ABI_VALUE} "rocm" "${ROCM_VERSION}" "${SYSTEM_STRING}") endif() +{% endif %} diff --git a/build2cmake/src/templates/utils.cmake b/build2cmake/src/templates/utils.cmake index 920b5969..d4d4cef2 100644 --- a/build2cmake/src/templates/utils.cmake +++ b/build2cmake/src/templates/utils.cmake @@ -555,196 +555,3 @@ function (define_gpu_extension_target GPU_MOD_NAME) install(TARGETS ${GPU_MOD_NAME} LIBRARY DESTINATION ${GPU_DESTINATION} COMPONENT ${GPU_MOD_NAME}) endfunction() - -# -# Generate a standardized build variant name following the pattern: -# torch--- (or torch-- for aarch64-darwin) -# -# Arguments: -# OUT_BUILD_NAME - Output variable name -# TORCH_VERSION - PyTorch version (e.g., "2.7.1") -# CXX11_ABI - Whether C++11 ABI is enabled (TRUE/FALSE) -# COMPUTE_FRAMEWORK - One of: cuda, rocm, metal, xpu -# COMPUTE_VERSION - Version of compute framework (e.g., "12.4" for CUDA, "6.0" for ROCm) -# SYSTEM - System identifier (e.g., "x86_64-linux", "aarch64-darwin") -# -# Example output: torch271-cxx11-cu124-x86_64-linux -# -function(generate_build_name OUT_BUILD_NAME TORCH_VERSION CXX11_ABI COMPUTE_FRAMEWORK COMPUTE_VERSION SYSTEM) - # Flatten version by removing dots and padding to 2 components - string(REPLACE "." ";" VERSION_LIST "${TORCH_VERSION}") - list(LENGTH VERSION_LIST VERSION_COMPONENTS) - - # Pad to at least 2 components - if(VERSION_COMPONENTS LESS 2) - list(APPEND VERSION_LIST "0") - endif() - - # Take first 2 components and join without dots - list(GET VERSION_LIST 0 MAJOR) - list(GET VERSION_LIST 1 MINOR) - set(FLATTENED_TORCH "${MAJOR}${MINOR}") - - # Determine ABI string (skip for aarch64-darwin) - if(SYSTEM STREQUAL "aarch64-darwin") - set(ABI_STRING "") - else() - if(CXX11_ABI) - set(ABI_STRING "cxx11") - else() - set(ABI_STRING "cxx98") - endif() - endif() - - # Generate compute string - if(COMPUTE_FRAMEWORK STREQUAL "cuda") - # Flatten CUDA version (e.g., "12.4" -> "124") - string(REPLACE "." ";" COMPUTE_VERSION_LIST "${COMPUTE_VERSION}") - list(LENGTH COMPUTE_VERSION_LIST COMPUTE_COMPONENTS) - if(COMPUTE_COMPONENTS GREATER_EQUAL 2) - list(GET COMPUTE_VERSION_LIST 0 COMPUTE_MAJOR) - list(GET COMPUTE_VERSION_LIST 1 COMPUTE_MINOR) - set(COMPUTE_STRING "cu${COMPUTE_MAJOR}${COMPUTE_MINOR}") - else() - list(GET COMPUTE_VERSION_LIST 0 COMPUTE_MAJOR) - set(COMPUTE_STRING "cu${COMPUTE_MAJOR}0") - endif() - elseif(COMPUTE_FRAMEWORK STREQUAL "rocm") - # Flatten ROCm version (e.g., "6.0" -> "60") - string(REPLACE "." ";" COMPUTE_VERSION_LIST "${COMPUTE_VERSION}") - list(LENGTH COMPUTE_VERSION_LIST COMPUTE_COMPONENTS) - if(COMPUTE_COMPONENTS GREATER_EQUAL 2) - list(GET COMPUTE_VERSION_LIST 0 COMPUTE_MAJOR) - list(GET COMPUTE_VERSION_LIST 1 COMPUTE_MINOR) - set(COMPUTE_STRING "rocm${COMPUTE_MAJOR}${COMPUTE_MINOR}") - else() - list(GET COMPUTE_VERSION_LIST 0 COMPUTE_MAJOR) - set(COMPUTE_STRING "rocm${COMPUTE_MAJOR}0") - endif() - elseif(COMPUTE_FRAMEWORK STREQUAL "metal") - set(COMPUTE_STRING "metal") - elseif(COMPUTE_FRAMEWORK STREQUAL "xpu") - # Flatten XPU version (e.g., "2025.2" -> "202552") - string(REPLACE "." ";" COMPUTE_VERSION_LIST "${COMPUTE_VERSION}") - list(LENGTH COMPUTE_VERSION_LIST COMPUTE_COMPONENTS) - if(COMPUTE_COMPONENTS GREATER_EQUAL 2) - list(GET COMPUTE_VERSION_LIST 0 COMPUTE_MAJOR) - list(GET COMPUTE_VERSION_LIST 1 COMPUTE_MINOR) - set(COMPUTE_STRING "xpu${COMPUTE_MAJOR}${COMPUTE_MINOR}") - else() - list(GET COMPUTE_VERSION_LIST 0 COMPUTE_MAJOR) - set(COMPUTE_STRING "xpu${COMPUTE_MAJOR}0") - endif() - else() - message(FATAL_ERROR "Unknown compute framework: ${COMPUTE_FRAMEWORK}") - endif() - - # Assemble the final build name - if(ABI_STRING STREQUAL "") - set(BUILD_NAME "torch${FLATTENED_TORCH}-${COMPUTE_STRING}-${SYSTEM}") - else() - set(BUILD_NAME "torch${FLATTENED_TORCH}-${ABI_STRING}-${COMPUTE_STRING}-${SYSTEM}") - endif() - - set(${OUT_BUILD_NAME} "${BUILD_NAME}" PARENT_SCOPE) - message(STATUS "Generated build name: ${BUILD_NAME}") -endfunction() - -# -# Create a custom install target for the huggingface/kernels library layout. -# This installs the extension into a directory structure suitable for kernel hub discovery: -# /// -# -# Arguments: -# TARGET_NAME - Name of the target to create the install rule for -# PACKAGE_NAME - Python package name (e.g., "activation") -# BUILD_VARIANT_NAME - Build variant name (e.g., "torch271-cxx11-cu124-x86_64-linux") -# INSTALL_PREFIX - Base installation directory (defaults to CMAKE_INSTALL_PREFIX) -# -function(add_kernels_install_target TARGET_NAME PACKAGE_NAME BUILD_VARIANT_NAME) - set(oneValueArgs INSTALL_PREFIX) - cmake_parse_arguments(ARG "" "${oneValueArgs}" "" ${ARGN}) - - if(NOT ARG_INSTALL_PREFIX) - set(ARG_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") - endif() - - # Create the kernels_install target if it doesn't exist - if(NOT TARGET kernels_install) - add_custom_target(kernels_install ALL - COMMENT "Installing all kernels to hub-compatible layout" - VERBATIM) - endif() - - # Create a custom target for this specific kernel - set(KERNEL_INSTALL_TARGET "${TARGET_NAME}_kernel_install") - set(KERNEL_INSTALL_DIR "${ARG_INSTALL_PREFIX}/${BUILD_VARIANT_NAME}/${PACKAGE_NAME}") - - add_custom_target(${KERNEL_INSTALL_TARGET} ALL - COMMAND ${CMAKE_COMMAND} -E make_directory "${KERNEL_INSTALL_DIR}" - COMMAND ${CMAKE_COMMAND} -E copy $ "${KERNEL_INSTALL_DIR}/" - COMMAND ${CMAKE_COMMAND} -E copy_directory - "${CMAKE_SOURCE_DIR}/torch-ext/${PACKAGE_NAME}" - "${KERNEL_INSTALL_DIR}/" - DEPENDS ${TARGET_NAME} - COMMENT "Installing ${TARGET_NAME} to ${KERNEL_INSTALL_DIR}" - VERBATIM) - - # Make kernels_install depend on this specific kernel's install - add_dependencies(kernels_install ${KERNEL_INSTALL_TARGET}) - - # Set folder for IDE organization - if(MSVC OR XCODE) - set_target_properties(${KERNEL_INSTALL_TARGET} PROPERTIES FOLDER "Install") - endif() - - message(STATUS "Added kernels_install target for ${TARGET_NAME} -> ${BUILD_VARIANT_NAME}/${PACKAGE_NAME}") -endfunction() - -# -# Add install rules for local development with huggingface/kernels. -# This installs the extension into the layout expected by get_local_kernel(): -# ${CMAKE_SOURCE_DIR}/build/// -# -# This allows developers to use get_local_kernel() from the kernels library to load -# locally built kernels without needing to publish to the hub. -# -# This uses the standard CMake install() command, so it works with the default -# "install" target that is always available. -# -# Arguments: -# TARGET_NAME - Name of the target to create the install rule for -# PACKAGE_NAME - Python package name (e.g., "activation") -# BUILD_VARIANT_NAME - Build variant name (e.g., "torch271-cxx11-cu124-x86_64-linux") -# -function(add_local_install_target TARGET_NAME PACKAGE_NAME BUILD_VARIANT_NAME) - # Define your local, folder based, installation directory - set(LOCAL_INSTALL_DIR "${CMAKE_SOURCE_DIR}/build/${BUILD_VARIANT_NAME}/${PACKAGE_NAME}") - - # Glob Python files at configure time - file(GLOB PYTHON_FILES "${CMAKE_SOURCE_DIR}/torch-ext/${PACKAGE_NAME}/*.py") - - # Create a custom target for local installation - add_custom_target(local_install - COMMENT "Installing files to local directory..." - ) - - # Add custom commands to copy files - add_custom_command(TARGET local_install POST_BUILD - # Copy the shared library - COMMAND ${CMAKE_COMMAND} -E copy_if_different - $ - ${LOCAL_INSTALL_DIR}/ - - # Copy each Python file - COMMAND ${CMAKE_COMMAND} -E copy_if_different - ${PYTHON_FILES} - ${LOCAL_INSTALL_DIR}/ - - COMMENT "Copying shared library and Python files to ${LOCAL_INSTALL_DIR}" - COMMAND_EXPAND_LISTS - ) - - file(MAKE_DIRECTORY ${LOCAL_INSTALL_DIR}) - message(STATUS "Added install rules for ${TARGET_NAME} -> build/${BUILD_VARIANT_NAME}/${PACKAGE_NAME}") -endfunction() diff --git a/build2cmake/src/templates/windows.cmake b/build2cmake/src/templates/windows.cmake new file mode 100644 index 00000000..ec313622 --- /dev/null +++ b/build2cmake/src/templates/windows.cmake @@ -0,0 +1,176 @@ +# Generate a standardized build variant name following the pattern: +# torch---windows +# +# Arguments: +# OUT_BUILD_NAME - Output variable name +# TORCH_VERSION - PyTorch version (e.g., "2.7.1") +# CXX11_ABI - Whether C++11 ABI is enabled (TRUE/FALSE) +# COMPUTE_FRAMEWORK - One of: cuda, rocm, metal, xpu +# COMPUTE_VERSION - Version of compute framework (e.g., "12.4" for CUDA, "6.0" for ROCm) +# Example output: torch271-cxx11-cu124-x86_64-windows +# +function(generate_build_name OUT_BUILD_NAME TORCH_VERSION CXX11_ABI COMPUTE_FRAMEWORK COMPUTE_VERSION) + # Flatten version by removing dots and padding to 2 components + string(REPLACE "." ";" VERSION_LIST "${TORCH_VERSION}") + list(LENGTH VERSION_LIST VERSION_COMPONENTS) + + # Pad to at least 2 components + if(VERSION_COMPONENTS LESS 2) + list(APPEND VERSION_LIST "0") + endif() + + # Take first 2 components and join without dots + list(GET VERSION_LIST 0 MAJOR) + list(GET VERSION_LIST 1 MINOR) + set(FLATTENED_TORCH "${MAJOR}${MINOR}") + + # Generate compute string + if(COMPUTE_FRAMEWORK STREQUAL "cuda") + # Flatten CUDA version (e.g., "12.4" -> "124") + string(REPLACE "." ";" COMPUTE_VERSION_LIST "${COMPUTE_VERSION}") + list(LENGTH COMPUTE_VERSION_LIST COMPUTE_COMPONENTS) + if(COMPUTE_COMPONENTS GREATER_EQUAL 2) + list(GET COMPUTE_VERSION_LIST 0 COMPUTE_MAJOR) + list(GET COMPUTE_VERSION_LIST 1 COMPUTE_MINOR) + set(COMPUTE_STRING "cu${COMPUTE_MAJOR}${COMPUTE_MINOR}") + else() + list(GET COMPUTE_VERSION_LIST 0 COMPUTE_MAJOR) + set(COMPUTE_STRING "cu${COMPUTE_MAJOR}0") + endif() + elseif(COMPUTE_FRAMEWORK STREQUAL "rocm") + # Flatten ROCm version (e.g., "6.0" -> "60") + string(REPLACE "." ";" COMPUTE_VERSION_LIST "${COMPUTE_VERSION}") + list(LENGTH COMPUTE_VERSION_LIST COMPUTE_COMPONENTS) + if(COMPUTE_COMPONENTS GREATER_EQUAL 2) + list(GET COMPUTE_VERSION_LIST 0 COMPUTE_MAJOR) + list(GET COMPUTE_VERSION_LIST 1 COMPUTE_MINOR) + set(COMPUTE_STRING "rocm${COMPUTE_MAJOR}${COMPUTE_MINOR}") + else() + list(GET COMPUTE_VERSION_LIST 0 COMPUTE_MAJOR) + set(COMPUTE_STRING "rocm${COMPUTE_MAJOR}0") + endif() + elseif(COMPUTE_FRAMEWORK STREQUAL "xpu") + # Flatten XPU version (e.g., "2025.2" -> "202552") + string(REPLACE "." ";" COMPUTE_VERSION_LIST "${COMPUTE_VERSION}") + list(LENGTH COMPUTE_VERSION_LIST COMPUTE_COMPONENTS) + if(COMPUTE_COMPONENTS GREATER_EQUAL 2) + list(GET COMPUTE_VERSION_LIST 0 COMPUTE_MAJOR) + list(GET COMPUTE_VERSION_LIST 1 COMPUTE_MINOR) + set(COMPUTE_STRING "xpu${COMPUTE_MAJOR}${COMPUTE_MINOR}") + else() + list(GET COMPUTE_VERSION_LIST 0 COMPUTE_MAJOR) + set(COMPUTE_STRING "xpu${COMPUTE_MAJOR}0") + endif() + else() + message(FATAL_ERROR "Unknown compute framework: ${COMPUTE_FRAMEWORK}") + endif() + + # Assemble the final build name + if(ABI_STRING STREQUAL "") + set(BUILD_NAME "torch${FLATTENED_TORCH}-${COMPUTE_STRING}-windows") + else() + set(BUILD_NAME "torch${FLATTENED_TORCH}-${ABI_STRING}-${COMPUTE_STRING}-windows") + endif() + + set(${OUT_BUILD_NAME} "${BUILD_NAME}" PARENT_SCOPE) + message(STATUS "Generated build name: ${BUILD_NAME}") +endfunction() + +# +# Create a custom install target for the huggingface/kernels library layout. +# This installs the extension into a directory structure suitable for kernel hub discovery: +# /// +# +# Arguments: +# TARGET_NAME - Name of the target to create the install rule for +# PACKAGE_NAME - Python package name (e.g., "activation") +# BUILD_VARIANT_NAME - Build variant name (e.g., "torch271-cxx11-cu124-x86_64-linux") +# INSTALL_PREFIX - Base installation directory (defaults to CMAKE_INSTALL_PREFIX) +# +function(add_kernels_install_target TARGET_NAME PACKAGE_NAME BUILD_VARIANT_NAME) + set(oneValueArgs INSTALL_PREFIX) + cmake_parse_arguments(ARG "" "${oneValueArgs}" "" ${ARGN}) + + if(NOT ARG_INSTALL_PREFIX) + set(ARG_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") + endif() + + # Create the kernels_install target if it doesn't exist + if(NOT TARGET kernels_install) + add_custom_target(kernels_install ALL + COMMENT "Installing all kernels to hub-compatible layout" + VERBATIM) + endif() + + # Create a custom target for this specific kernel + set(KERNEL_INSTALL_TARGET "${TARGET_NAME}_kernel_install") + set(KERNEL_INSTALL_DIR "${ARG_INSTALL_PREFIX}/${BUILD_VARIANT_NAME}/${PACKAGE_NAME}") + + add_custom_target(${KERNEL_INSTALL_TARGET} ALL + COMMAND ${CMAKE_COMMAND} -E make_directory "${KERNEL_INSTALL_DIR}" + COMMAND ${CMAKE_COMMAND} -E copy $ "${KERNEL_INSTALL_DIR}/" + COMMAND ${CMAKE_COMMAND} -E copy_directory + "${CMAKE_SOURCE_DIR}/torch-ext/${PACKAGE_NAME}" + "${KERNEL_INSTALL_DIR}/" + DEPENDS ${TARGET_NAME} + COMMENT "Installing ${TARGET_NAME} to ${KERNEL_INSTALL_DIR}" + VERBATIM) + + # Make kernels_install depend on this specific kernel's install + add_dependencies(kernels_install ${KERNEL_INSTALL_TARGET}) + + # Set folder for IDE organization + if(MSVC OR XCODE) + set_target_properties(${KERNEL_INSTALL_TARGET} PROPERTIES FOLDER "Install") + endif() + + message(STATUS "Added kernels_install target for ${TARGET_NAME} -> ${BUILD_VARIANT_NAME}/${PACKAGE_NAME}") +endfunction() + +# +# Add install rules for local development with huggingface/kernels. +# This installs the extension into the layout expected by get_local_kernel(): +# ${CMAKE_SOURCE_DIR}/build/// +# +# This allows developers to use get_local_kernel() from the kernels library to load +# locally built kernels without needing to publish to the hub. +# +# This uses the standard CMake install() command, so it works with the default +# "install" target that is always available. +# +# Arguments: +# TARGET_NAME - Name of the target to create the install rule for +# PACKAGE_NAME - Python package name (e.g., "activation") +# BUILD_VARIANT_NAME - Build variant name (e.g., "torch271-cxx11-cu124-x86_64-linux") +# +function(add_local_install_target TARGET_NAME PACKAGE_NAME BUILD_VARIANT_NAME) + # Define your local, folder based, installation directory + set(LOCAL_INSTALL_DIR "${CMAKE_SOURCE_DIR}/build/${BUILD_VARIANT_NAME}/${PACKAGE_NAME}") + + # Glob Python files at configure time + file(GLOB PYTHON_FILES "${CMAKE_SOURCE_DIR}/torch-ext/${PACKAGE_NAME}/*.py") + + # Create a custom target for local installation + add_custom_target(local_install + COMMENT "Installing files to local directory..." + ) + + # Add custom commands to copy files + add_custom_command(TARGET local_install POST_BUILD + # Copy the shared library + COMMAND ${CMAKE_COMMAND} -E copy_if_different + $ + ${LOCAL_INSTALL_DIR}/ + + # Copy each Python file + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${PYTHON_FILES} + ${LOCAL_INSTALL_DIR}/ + + COMMENT "Copying shared library and Python files to ${LOCAL_INSTALL_DIR}" + COMMAND_EXPAND_LISTS + ) + + file(MAKE_DIRECTORY ${LOCAL_INSTALL_DIR}) + message(STATUS "Added install rules for ${TARGET_NAME} -> build/${BUILD_VARIANT_NAME}/${PACKAGE_NAME}") +endfunction() \ No newline at end of file diff --git a/build2cmake/src/torch/cuda.rs b/build2cmake/src/torch/cuda.rs index 74326441..08d4fb51 100644 --- a/build2cmake/src/torch/cuda.rs +++ b/build2cmake/src/torch/cuda.rs @@ -1,4 +1,5 @@ use std::collections::HashSet; +use std::env; use std::io::Write; use std::path::PathBuf; @@ -12,6 +13,7 @@ use crate::version::Version; use crate::FileSet; static CMAKE_UTILS: &str = include_str!("../templates/utils.cmake"); +static WINDOWS_UTILS: &str = include_str!("../templates/windows.cmake"); static REGISTRATION_H: &str = include_str!("../templates/registration.h"); static HIPIFY: &str = include_str!("../templates/cuda/hipify.py"); static CUDA_SUPPORTED_ARCHS_JSON: &str = include_str!("../cuda_supported_archs.json"); @@ -155,6 +157,13 @@ fn write_cmake( .entry(utils_path.clone()) .extend_from_slice(CMAKE_UTILS.as_bytes()); + let mut windows_utils_path = PathBuf::new(); + windows_utils_path.push("cmake"); + windows_utils_path.push("windows.cmake"); + file_set + .entry(windows_utils_path.clone()) + .extend_from_slice(WINDOWS_UTILS.as_bytes()); + let mut hipify_path = PathBuf::new(); hipify_path.push("cmake"); hipify_path.push("hipify.py"); @@ -388,7 +397,7 @@ pub fn render_preamble( cuda_minver => cuda_minver.map(|v| v.to_string()), cuda_maxver => cuda_maxver.map(|v| v.to_string()), cuda_supported_archs => cuda_supported_archs(), - + platform => env::consts::OS }, &mut *write, ) From 76b0ba676ae4fa4ebbfa5b40b006ed79751ca88b Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Thu, 16 Oct 2025 15:55:55 +0200 Subject: [PATCH 35/38] misc(builder): remove remaining SYSTEM_STRING override --- build2cmake/src/templates/cuda/preamble.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/build2cmake/src/templates/cuda/preamble.cmake b/build2cmake/src/templates/cuda/preamble.cmake index 71883242..077ce09b 100644 --- a/build2cmake/src/templates/cuda/preamble.cmake +++ b/build2cmake/src/templates/cuda/preamble.cmake @@ -110,7 +110,6 @@ run_python(CXX11_ABI_VALUE "import torch; print('TRUE' if torch._C._GLIBCXX_USE_ cmake_host_system_information(RESULT HOST_ARCH QUERY OS_PLATFORM) set(SYSTEM_STRING "${HOST_ARCH}-windows") -set(SYSTEM_STRING "${HOST_ARCH}-${CMAKE_SYSTEM_NAME}") if(GPU_LANG STREQUAL "CUDA") generate_build_name(BUILD_VARIANT_NAME "${TORCH_VERSION}" ${CXX11_ABI_VALUE} "cuda" "${CUDA_VERSION}" "${SYSTEM_STRING}") From 482422f4a50439e65a8c2b2fb3697d7edad11d98 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Thu, 16 Oct 2025 16:02:38 +0200 Subject: [PATCH 36/38] misc(builder): remove unneeded changes for unsupported platforms --- build2cmake/src/templates/metal/preamble.cmake | 13 ------------- .../src/templates/metal/torch-extension.cmake | 6 ------ build2cmake/src/templates/xpu/preamble.cmake | 14 -------------- .../src/templates/xpu/torch-extension.cmake | 8 +------- 4 files changed, 1 insertion(+), 40 deletions(-) diff --git a/build2cmake/src/templates/metal/preamble.cmake b/build2cmake/src/templates/metal/preamble.cmake index 543a9970..c5cf4256 100644 --- a/build2cmake/src/templates/metal/preamble.cmake +++ b/build2cmake/src/templates/metal/preamble.cmake @@ -29,16 +29,3 @@ add_compile_definitions(METAL_KERNEL) # Initialize list for Metal shader sources set(ALL_METAL_SOURCES) - -# Generate standardized build name -run_python(TORCH_VERSION "import torch; print(torch.__version__.split('+')[0])" "Failed to get Torch version") -run_python(CXX11_ABI_VALUE "import torch; print('TRUE' if torch._C._GLIBCXX_USE_CXX11_ABI else 'FALSE')" "Failed to get CXX11 ABI") -cmake_host_system_information(RESULT HOST_ARCH QUERY OS_PLATFORM) -if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") - set(SYSTEM_STRING "${HOST_ARCH}-darwin") -else() - message(FATAL_ERROR "Metal is only supported on macOS/Darwin") -endif() - -# Metal doesn't have a version - it's tied to the OS -generate_build_name(BUILD_VARIANT_NAME "${TORCH_VERSION}" ${CXX11_ABI_VALUE} "metal" "0" "${SYSTEM_STRING}") diff --git a/build2cmake/src/templates/metal/torch-extension.cmake b/build2cmake/src/templates/metal/torch-extension.cmake index e67e80fc..7995053d 100644 --- a/build2cmake/src/templates/metal/torch-extension.cmake +++ b/build2cmake/src/templates/metal/torch-extension.cmake @@ -15,9 +15,3 @@ define_gpu_extension_target( if(ALL_METAL_SOURCES) compile_metal_shaders({{ ops_name }} "${ALL_METAL_SOURCES}") endif() - -# Add kernels_install target for huggingface/kernels library layout -add_kernels_install_target({{ ops_name }} "{{ name }}" "${BUILD_VARIANT_NAME}") - -# Add local_install target for local development with get_local_kernel() -add_local_install_target({{ ops_name }} "{{ name }}" "${BUILD_VARIANT_NAME}") \ No newline at end of file diff --git a/build2cmake/src/templates/xpu/preamble.cmake b/build2cmake/src/templates/xpu/preamble.cmake index 2f56442d..d171d100 100644 --- a/build2cmake/src/templates/xpu/preamble.cmake +++ b/build2cmake/src/templates/xpu/preamble.cmake @@ -58,17 +58,3 @@ add_compile_definitions(USE_XPU) set(sycl_link_flags "-fsycl;--offload-compress;-fsycl-targets=spir64_gen,spir64;-Xs;-device pvc,xe-lpg,ats-m150 -options ' -cl-intel-enable-auto-large-GRF-mode -cl-poison-unsupported-fp64-kernels -cl-intel-greater-than-4GB-buffer-required';") set(sycl_flags "-fsycl;-fhonor-nans;-fhonor-infinities;-fno-associative-math;-fno-approx-func;-fno-sycl-instrument-device-code;--offload-compress;-fsycl-targets=spir64_gen,spir64;") message(STATUS "Configuring for Intel XPU backend using SYCL") - -# Generate standardized build name -run_python(TORCH_VERSION "import torch; print(torch.__version__.split('+')[0])" "Failed to get Torch version") -run_python(CXX11_ABI_VALUE "import torch; print('TRUE' if torch._C._GLIBCXX_USE_CXX11_ABI else 'FALSE')" "Failed to get CXX11 ABI") -cmake_host_system_information(RESULT HOST_ARCH QUERY OS_PLATFORM) -if(CMAKE_SYSTEM_NAME STREQUAL "Linux") - set(SYSTEM_STRING "${HOST_ARCH}-linux") -elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows") - set(SYSTEM_STRING "${HOST_ARCH}-windows") -else() - set(SYSTEM_STRING "${HOST_ARCH}-${CMAKE_SYSTEM_NAME}") -endif() - -generate_build_name(BUILD_VARIANT_NAME "${TORCH_VERSION}" ${CXX11_ABI_VALUE} "xpu" "${DPCPP_VERSION}" "${SYSTEM_STRING}") diff --git a/build2cmake/src/templates/xpu/torch-extension.cmake b/build2cmake/src/templates/xpu/torch-extension.cmake index 2dd1e1b1..75064049 100644 --- a/build2cmake/src/templates/xpu/torch-extension.cmake +++ b/build2cmake/src/templates/xpu/torch-extension.cmake @@ -10,10 +10,4 @@ define_gpu_extension_target( # Add XPU/SYCL specific linker flags target_link_options({{ ops_name }} PRIVATE ${sycl_link_flags}) -target_link_libraries({{ ops_name }} PRIVATE dnnl) - -# Add kernels_install target for huggingface/kernels library layout -add_kernels_install_target({{ ops_name }} "{{ name }}" "${BUILD_VARIANT_NAME}") - -# Add local_install target for local development with get_local_kernel() -add_local_install_target({{ ops_name }} "{{ name }}" "${BUILD_VARIANT_NAME}") +target_link_libraries({{ ops_name }} PRIVATE dnnl) \ No newline at end of file From 579aa929bde61d0de94020f21a1e4de4167208cd Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Thu, 16 Oct 2025 16:03:11 +0200 Subject: [PATCH 37/38] misc(builder): conditionally include add_kernels_install_targets only on windows --- build2cmake/src/templates/cuda/torch-extension.cmake | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/build2cmake/src/templates/cuda/torch-extension.cmake b/build2cmake/src/templates/cuda/torch-extension.cmake index e2f0ecb1..5b934e10 100644 --- a/build2cmake/src/templates/cuda/torch-extension.cmake +++ b/build2cmake/src/templates/cuda/torch-extension.cmake @@ -13,8 +13,13 @@ if( NOT MSVC) target_link_options({{ ops_name }} PRIVATE -static-libstdc++) endif() +{% if platform == 'windows' %} +# These methods below should be included from preamble.cmake on windows platform. + # Add kernels_install target for huggingface/kernels library layout add_kernels_install_target({{ ops_name }} "{{ name }}" "${BUILD_VARIANT_NAME}") # Add local_install target for local development with get_local_kernel() add_local_install_target({{ ops_name }} "{{ name }}" "${BUILD_VARIANT_NAME}") + +{% endif %} From b4a4cfe9ee51370ab34cdd841b3012d36e8d1728 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Thu, 16 Oct 2025 16:41:12 +0200 Subject: [PATCH 38/38] misc(builder): make sure unintended changes are not in the diff --- build2cmake/src/templates/metal/torch-extension.cmake | 2 +- build2cmake/src/templates/xpu/torch-extension.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/build2cmake/src/templates/metal/torch-extension.cmake b/build2cmake/src/templates/metal/torch-extension.cmake index 7995053d..3f81df03 100644 --- a/build2cmake/src/templates/metal/torch-extension.cmake +++ b/build2cmake/src/templates/metal/torch-extension.cmake @@ -14,4 +14,4 @@ define_gpu_extension_target( # Compile Metal shaders if any were found if(ALL_METAL_SOURCES) compile_metal_shaders({{ ops_name }} "${ALL_METAL_SOURCES}") -endif() +endif() \ No newline at end of file diff --git a/build2cmake/src/templates/xpu/torch-extension.cmake b/build2cmake/src/templates/xpu/torch-extension.cmake index 75064049..d2a95e47 100644 --- a/build2cmake/src/templates/xpu/torch-extension.cmake +++ b/build2cmake/src/templates/xpu/torch-extension.cmake @@ -10,4 +10,4 @@ define_gpu_extension_target( # Add XPU/SYCL specific linker flags target_link_options({{ ops_name }} PRIVATE ${sycl_link_flags}) -target_link_libraries({{ ops_name }} PRIVATE dnnl) \ No newline at end of file +target_link_libraries({{ ops_name }} PRIVATE dnnl)