Skip to content

Commit 9a22d3a

Browse files
author
zhangyue
committed
build(ascend-custom): pip-buildable custom kernels via pre-built static lib
`BUILD_CUSTOM_KERNEL=ON` under `scikit-build-core` / `pip install` trips a `CANN` `extract_host_stub.py` path-handling bug (`KeyError` on the `/./workspace/...` path produced by `$<TARGET_OBJECTS>` in CMake 4.x). `CMakeLists.txt` already notes the issue and suggests the standalone `src/ascend/custom/build.sh` script as the workaround. - `src/CMakeLists.txt`: when a pre-built `src/ascend/custom/build/lib/libno_workspace_kernel.a` exists, import it as `IMPORTED STATIC` and link via `$<TARGET_FILE:…>` + `--whole-archive`. Falls back to in-tree `ascendc_library()` when the pre-built is absent (works for direct `cmake` builds outside `pip`). - `src/ascend/custom/CMakeLists.txt`: pushes the main `src/` onto the interface target's `INCLUDES` property so the kernel TU can `#include "data_type.h"` — the `ascendc_library()` macro forwards this into its nested `ExternalProject_Add` sub-build. - `src/ascend/custom/add_rms_norm/op_kernel/.clang-tidy`: disables all `clang-tidy` checks for device code under this directory (compiled by `ccec`, absent from `compile_commands.json`, `__aicore__` macro parses incorrectly without `kernel_operator.h`).
1 parent b6821a5 commit 9a22d3a

3 files changed

Lines changed: 40 additions & 2 deletions

File tree

src/CMakeLists.txt

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,25 @@ if(WITH_ASCEND)
242242

243243
# Custom `AscendC` kernels (PyTorch extension, requires `torch_npu`).
244244
if(BUILD_CUSTOM_KERNEL)
245-
add_subdirectory(ascend/custom)
245+
# `scikit-build-core`'s temp-dir builds trip the `CANN`
246+
# `extract_host_stub.py` path-handling bug (`KeyError` on
247+
# `/./workspace/...` paths in `$<TARGET_OBJECTS>`). Workaround:
248+
# prefer a pre-built static lib produced by the standalone
249+
# `src/ascend/custom/build.sh` script. Fall back to in-tree
250+
# `ascendc_library()` when no pre-built is available (works for
251+
# direct `cmake` builds outside `pip`).
252+
set(_prebuilt_custom_lib
253+
"${CMAKE_CURRENT_SOURCE_DIR}/ascend/custom/build/lib/libno_workspace_kernel.a")
254+
255+
if(EXISTS "${_prebuilt_custom_lib}")
256+
message(STATUS "Custom `AscendC` kernels: using pre-built `${_prebuilt_custom_lib}`.")
257+
add_library(no_workspace_kernel STATIC IMPORTED GLOBAL)
258+
set_target_properties(no_workspace_kernel PROPERTIES
259+
IMPORTED_LOCATION "${_prebuilt_custom_lib}")
260+
else()
261+
message(STATUS "Custom `AscendC` kernels: building in-tree (run `src/ascend/custom/build.sh` first to bypass the `CANN` path bug under `pip`).")
262+
add_subdirectory(ascend/custom)
263+
endif()
246264

247265
# Link the compiled `AscendC` kernel objects into `infiniops` so that
248266
# custom kernel implementations (e.g. `RmsNorm` index 1) can call
@@ -379,9 +397,11 @@ if(GENERATE_PYTHON_BINDINGS)
379397
# The `Operator<..., 1>` template instantiations that call
380398
# `aclrtlaunch_*` live in `ops.cc`, so link here with
381399
# `--whole-archive` to ensure all launch functions are available.
400+
# `$<TARGET_FILE>` works for both real `ascendc_library()` targets and
401+
# `IMPORTED` targets pointing at a pre-built `.a`.
382402
if(BUILD_CUSTOM_KERNEL)
383403
target_link_libraries(ops PRIVATE
384-
-Wl,--whole-archive no_workspace_kernel -Wl,--no-whole-archive)
404+
-Wl,--whole-archive $<TARGET_FILE:no_workspace_kernel> -Wl,--no-whole-archive)
385405
endif()
386406

387407
set_target_properties(infiniops PROPERTIES INSTALL_RPATH "$ORIGIN")

src/ascend/custom/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,15 @@ ascendc_library(no_workspace_kernel STATIC
6363
${PROJECT_OP_SRC_BASE}/add_rms_norm/op_kernel/add_rms_norm.cpp
6464
)
6565

66+
# The kernel translation units include `"data_type_enum.h"` from the main
67+
# project's `src/` so that launcher and device code share one `DataType`
68+
# enum. `ascendc_library` forwards the interface target's `INCLUDES`
69+
# property to the nested `ExternalProject_Add` (see
70+
# `${ASCEND_HOME_PATH}/tools/tikcpp/ascendc_kernel_cmake/legacy_modules/function.cmake`),
71+
# so append the main `src/` dir here.
72+
set_property(TARGET no_workspace_kernel_interface APPEND PROPERTY
73+
INCLUDES ${PROJECT_OP_SRC_BASE}/../..)
74+
6675
# Create the shared library `libascend_kernel.so`.
6776
add_library(${OP_PLUGIN_NAME} SHARED ${OP_SRCS})
6877

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
# `op_kernel/*.cpp` is `AscendC` device code compiled by `ccec`, not by
3+
# the host toolchain, so it has no entry in `compile_commands.json` and
4+
# `clang-tidy` cannot parse it correctly (the `__aicore__` macro expands
5+
# unexpectedly when `kernel_operator.h` is absent). Disable all checks
6+
# here — the `op_host/` side and the `kernel_custom.h` launcher still
7+
# enforce the full ruleset.
8+
9+
Checks: '-*'

0 commit comments

Comments
 (0)