Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,7 @@ compile_commands.json
# Built nanobind extensions
python/_task_interface*.so
python/_task_interface*.dylib

# Log files
*.log
profiling_logs_*/
6 changes: 6 additions & 0 deletions python/simpler/runtime_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,12 @@ def _build(actual_build_dir: str) -> Union[bytes, Path]:
od.mkdir(parents=True, exist_ok=True)
dest = od / binary_name
shutil.copy2(binary_path, dest)
dispatcher_so = Path(actual_build_dir) / "libaicpu_dispatcher.so"
if dispatcher_so.is_file():
dest_dispatcher = od / "libaicpu_dispatcher.so"
shutil.copy2(dispatcher_so, dest_dispatcher)
# Strip debug info to match CANN built-in SO format
subprocess.run(["strip", "-s", str(dest_dispatcher)], check=True)
return dest
else:
with open(binary_path, "rb") as f:
Expand Down
36 changes: 36 additions & 0 deletions src/a2a3/platform/onboard/aicpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ project(aicpu_kernel LANGUAGES C CXX)
set(CMAKE_CUSTOM_INCLUDE_DIRS "")
list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../include")
list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/task_interface")
list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/aicpu_dispatcher")
if(DEFINED CUSTOM_INCLUDE_DIRS)
foreach(INC_DIR ${CUSTOM_INCLUDE_DIRS})
list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${INC_DIR}")
Expand Down Expand Up @@ -75,3 +76,38 @@ target_link_directories(aicpu_kernel
# Output name
set_target_properties(aicpu_kernel PROPERTIES OUTPUT_NAME aicpu_kernel)

# Build dispatcher SO (two-layer architecture)
set(AICPU_DISPATCHER_SOURCES
"${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/aicpu_dispatcher/aicpu_dispatcher.cpp"
)
add_library(aicpu_dispatcher SHARED ${AICPU_DISPATCHER_SOURCES})

# Compiler options for dispatcher (same as AICPU kernel)
target_compile_options(aicpu_dispatcher
PRIVATE
-Wall
-Wextra
-rdynamic
-O3
-fPIC
-g
$<$<COMPILE_LANGUAGE:CXX>:-std=gnu++17>
)

# Include directories for dispatcher
target_include_directories(aicpu_dispatcher
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CUSTOM_INCLUDE_DIRS}
${ASCEND_HOME_PATH}/include
)

# Link against dl for dlopen/dlsym
target_link_libraries(aicpu_dispatcher PRIVATE dl)

# Match CANN built-in SO properties: SYMBOLIC flag, build-id, stripped
set_target_properties(aicpu_dispatcher PROPERTIES
LINK_FLAGS "-Wl,-Bsymbolic,--build-id"
OUTPUT_NAME "aicpu_dispatcher"
)

28 changes: 23 additions & 5 deletions src/a2a3/platform/onboard/host/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ set(CMAKE_CUSTOM_INCLUDE_DIRS "")
list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../include")
list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/task_interface")
list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/worker")
list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/host")
list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/aicpu_dispatcher")
if(DEFINED CUSTOM_INCLUDE_DIRS)
foreach(INC_DIR ${CUSTOM_INCLUDE_DIRS})
list(APPEND CMAKE_CUSTOM_INCLUDE_DIRS "${INC_DIR}")
Expand All @@ -35,10 +37,15 @@ list(APPEND HOST_RUNTIME_SOURCES
"${CMAKE_CURRENT_SOURCE_DIR}/pto_runtime_c_api.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/platform_compile_info.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/host_regs.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/aicpu_loader.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/../../src/host/host_log.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/../../src/host/unified_log_host.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/../../src/host/performance_collector.cpp"
)
# Add common/host sources (LoadAicpuOp)
list(APPEND HOST_RUNTIME_SOURCES
"${CMAKE_CURRENT_SOURCE_DIR}/../../../../common/host/load_aicpu_op.cpp"
)
if(DEFINED CUSTOM_SOURCE_DIRS)
foreach(SRC_DIR ${CUSTOM_SOURCE_DIRS})
file(GLOB DIR_SOURCES "${SRC_DIR}/*.cpp" "${SRC_DIR}/*.c")
Expand Down Expand Up @@ -84,11 +91,16 @@ target_include_directories(host_runtime
${ASCEND_HOME_PATH}/${CMAKE_SYSTEM_PROCESSOR}-linux/include/driver
)

target_link_directories(host_runtime
PRIVATE
${ASCEND_HOME_PATH}/lib64
${ASCEND_HOME_PATH}/runtime/lib64
)
# Conditional compilation for new CANN interface
option(BUILD_WITH_NEW_CANN "Use new rtsLaunchCpuKernel interface (CANN 7.0+)" ON)
if(BUILD_WITH_NEW_CANN)
target_compile_definitions(host_runtime PRIVATE BUILD_WITH_NEW_CANN)
# Add additional include path for new RTS headers (CANN 7.0+)
target_include_directories(host_runtime PRIVATE
${ASCEND_HOME_PATH}/pkg_inc/runtime/runtime
)
message(STATUS "Building with new CANN rtsLaunchCpuKernel interface")
endif()

# Link against CANN runtime libraries
# ascend_hal is dynamically loaded at runtime via dlopen in device_runner
Expand All @@ -100,4 +112,10 @@ target_link_libraries(host_runtime
dl
)

target_link_directories(host_runtime
PRIVATE
${ASCEND_HOME_PATH}/lib64
${ASCEND_HOME_PATH}/runtime/lib64
)

set_target_properties(host_runtime PROPERTIES OUTPUT_NAME "host_runtime")
71 changes: 71 additions & 0 deletions src/a2a3/platform/onboard/host/aicpu_loader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Copyright (c) PyPTO Contributors.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
* -----------------------------------------------------------------------------------------------------------
*/
/**
* AICPU Loader Implementation (Legacy Interface)
*
* Provides AICPU kernel launching via the legacy rtAicpuKernelLaunchExWithArgs API.
* Used when BUILD_WITH_NEW_CANN is OFF. When BUILD_WITH_NEW_CANN is ON,
* device_runner uses LoadAicpuOp (src/common/host/load_aicpu_op.h) instead.
*/

#include "aicpu_loader.h"

#include <cstring>

#include "common/unified_log.h"
#include "common/kernel_args.h"

int AicpuLoader::init_with_binary(
const std::vector<uint8_t> &aicpu_binary, const std::vector<std::string> &kernel_names
) {
// Legacy interface: No pre-loading needed
(void)aicpu_binary;
(void)kernel_names;
LOG_INFO("AicpuLoader: Using legacy rtAicpuKernelLaunchExWithArgs interface");
return 0;
}

int AicpuLoader::init(const std::string &so_path, const std::vector<std::string> &kernel_names) {
// Legacy interface: No pre-loading needed
(void)so_path;
(void)kernel_names;
LOG_INFO("AicpuLoader: Using legacy rtAicpuKernelLaunchExWithArgs interface");
return 0;
}

int AicpuLoader::launch(rtStream_t stream, KernelArgs *k_args, const char *kernel_name, int aicpu_num) {
// Legacy interface: rtAicpuKernelLaunchExWithArgs
struct Args {
KernelArgs k_args;
char kernel_name[32];
const char so_name[32] = {"libaicpu_extend_kernels.so"};
const char op_name[32] = {""};
} args;

args.k_args = *k_args;
std::strncpy(args.kernel_name, kernel_name, sizeof(args.kernel_name) - 1);
args.kernel_name[sizeof(args.kernel_name) - 1] = '\0';

rtAicpuArgsEx_t rt_args;
std::memset(&rt_args, 0, sizeof(rt_args));
rt_args.args = &args;
rt_args.argsSize = sizeof(args);
rt_args.kernelNameAddrOffset = offsetof(struct Args, kernel_name);
rt_args.soNameAddrOffset = offsetof(struct Args, so_name);

return rtAicpuKernelLaunchExWithArgs(
rtKernelType_t::KERNEL_TYPE_AICPU_KFC, "AST_DYN_AICPU", aicpu_num, &rt_args, nullptr, stream, 0
);
}

void AicpuLoader::finalize() {
// Legacy interface: No-op
}
69 changes: 69 additions & 0 deletions src/a2a3/platform/onboard/host/aicpu_loader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* Copyright (c) PyPTO Contributors.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
* -----------------------------------------------------------------------------------------------------------
*/
/**
* AICPU Loader Abstraction (Legacy Interface)
*
* Provides AICPU kernel launching via the legacy rtAicpuKernelLaunchExWithArgs API.
* Used when BUILD_WITH_NEW_CANN is OFF. When BUILD_WITH_NEW_CANN is ON,
* device_runner uses LoadAicpuOp (src/common/host/load_aicpu_op.h) instead.
*/

#ifndef A2A3_PLATFORM_ONBOARD_HOST_AICPU_LOADER_H_
#define A2A3_PLATFORM_ONBOARD_HOST_AICPU_LOADER_H_

#include <cstdint>
#include <string>
#include <vector>

#include <runtime/rt.h>

// Forward declarations
struct KernelArgs;

/**
* @brief AICPU kernel loader (legacy interface)
*
* Launches AICPU kernels via the legacy rtAicpuKernelLaunchExWithArgs API.
* Used as the fallback when BUILD_WITH_NEW_CANN is OFF.
*/
class AicpuLoader {
public:
AicpuLoader() = default;
~AicpuLoader() = default;

/**
* @brief Initialize the AICPU loader with binary data (no-op for legacy interface)
*/
int init_with_binary(const std::vector<uint8_t> &aicpu_binary, const std::vector<std::string> &kernel_names);

/**
* @brief Initialize the AICPU loader (no-op for legacy interface)
*/
int init(const std::string &so_path, const std::vector<std::string> &kernel_names);

/**
* @brief Launch an AICPU kernel via legacy rtAicpuKernelLaunchExWithArgs
*/
int launch(rtStream_t stream, KernelArgs *k_args, const char *kernel_name, int aicpu_num);

/**
* @brief Cleanup resources (no-op for legacy interface)
*/
void finalize();

// Disable copy and move
AicpuLoader(const AicpuLoader &) = delete;
AicpuLoader &operator=(const AicpuLoader &) = delete;
AicpuLoader(AicpuLoader &&) = delete;
AicpuLoader &operator=(AicpuLoader &&) = delete;
};

#endif // A2A3_PLATFORM_ONBOARD_HOST_AICPU_LOADER_H_
Loading