Skip to content

Commit 65060b3

Browse files
committed
Add: memfd-based SO loading for all runtimes
- Add memfd_loader.h for in-memory SO loading using memfd_create - Integrate memfd loading into AICPU executors across all runtimes - Eliminates temporary file pollution in /tmp directory - Provides consistent loading performance without filesystem overhead
1 parent b598e03 commit 65060b3

6 files changed

Lines changed: 590 additions & 120 deletions

File tree

src/a2a3/runtime/aicpu_build_graph/aicpu/aicpu_executor.cpp

Lines changed: 71 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "pto2_dispatch_payload.h"
2929
#include "runtime.h"
3030
#include "spin_hint.h"
31+
#include "memfd_loader.h"
3132

3233
// Runtime headers (full struct definition for create/destroy + PTO2_SCOPE)
3334
#include "pto_runtime2.h"
@@ -241,6 +242,7 @@ struct AicpuExecutor {
241242
// Orchestration SO handle - defer dlclose until all tasks complete
242243
void *orch_so_handle_{nullptr};
243244
char orch_so_path_[256]{}; // Path to orchestration SO file for cleanup
245+
int orch_so_memfd_{-1}; // memfd for memfd_create path (-1 if file-based)
244246

245247
// Shared orchestration function pointer (loaded by first orch thread, used by all)
246248
DeviceOrchestrationFunc orch_func_{nullptr};
@@ -1618,50 +1620,71 @@ int32_t AicpuExecutor::run(Runtime *runtime) {
16181620
return -1;
16191621
}
16201622

1621-
// Try multiple paths that may allow execution on AICPU
1623+
// Try memfd first, fall back to file-based
16221624
char so_path[256];
1623-
bool file_created = false;
1624-
const char *candidate_dirs[] = {
1625-
"/usr/lib64/aicpu_kernels/0/aicpu_kernels_device", "/usr/lib64", "/lib64", "/var/tmp", "/tmp"
1626-
};
1627-
const int32_t num_candidates = sizeof(candidate_dirs) / sizeof(candidate_dirs[0]);
1628-
1629-
for (int32_t i = 0; i < num_candidates && !file_created; i++) {
1630-
snprintf(so_path, sizeof(so_path), "%s/libdevice_orch_%d.so", candidate_dirs[i], getpid());
1631-
int32_t fd = open(so_path, O_WRONLY | O_CREAT | O_TRUNC, 0755);
1632-
if (fd < 0) {
1633-
DEV_INFO(
1634-
"Thread %d: Cannot create SO at %s (errno=%d), trying next path", thread_idx, so_path, errno
1635-
);
1636-
continue;
1637-
}
1638-
ssize_t written = write(fd, so_data, so_size);
1639-
close(fd);
1640-
if (written != static_cast<ssize_t>(so_size)) {
1641-
DEV_INFO(
1642-
"Thread %d: Cannot write SO to %s (errno=%d), trying next path", thread_idx, so_path, errno
1643-
);
1644-
unlink(so_path);
1645-
continue;
1646-
}
1647-
file_created = true;
1648-
DEV_INFO("Thread %d: Created SO file at %s (%zu bytes)", thread_idx, so_path, so_size);
1649-
}
1625+
void *handle = nullptr;
1626+
int memfd = -1;
16501627

1651-
if (!file_created) {
1652-
DEV_ERROR("Thread %d: Failed to create SO file in any candidate path", thread_idx);
1653-
return -1;
1628+
// Attempt memfd-based loading first
1629+
int memfd_rc = load_orchestration_so_with_memfd(
1630+
so_data, so_size, thread_idx, &handle, so_path, &memfd
1631+
);
1632+
1633+
if (memfd_rc == 0 && handle != nullptr) {
1634+
// memfd loading succeeded, use memfd-loaded handle
1635+
orch_so_memfd_ = memfd;
16541636
}
16551637

1656-
dlerror();
1657-
void *handle = dlopen(so_path, RTLD_LAZY | RTLD_LOCAL);
1658-
const char *dlopen_err = dlerror();
16591638
if (handle == nullptr) {
1660-
DEV_ERROR("Thread %d: dlopen failed: %s", thread_idx, dlopen_err ? dlopen_err : "unknown");
1661-
unlink(so_path);
1662-
return -1;
1639+
// memfd failed or unavailable - use file-based loading
1640+
orch_so_memfd_ = -1;
1641+
1642+
// Try multiple paths that may allow execution on AICPU
1643+
bool file_created = false;
1644+
const char *candidate_dirs[] = {
1645+
"/usr/lib64/aicpu_kernels/0/aicpu_kernels_device", "/usr/lib64", "/lib64", "/var/tmp", "/tmp"
1646+
};
1647+
const int32_t num_candidates = sizeof(candidate_dirs) / sizeof(candidate_dirs[0]);
1648+
1649+
for (int32_t i = 0; i < num_candidates && !file_created; i++) {
1650+
snprintf(so_path, sizeof(so_path), "%s/libdevice_orch_%d.so", candidate_dirs[i], getpid());
1651+
int32_t fd = open(so_path, O_WRONLY | O_CREAT | O_TRUNC, 0755);
1652+
if (fd < 0) {
1653+
DEV_INFO(
1654+
"Thread %d: Cannot create SO at %s (errno=%d), trying next path", thread_idx, so_path, errno
1655+
);
1656+
continue;
1657+
}
1658+
ssize_t written = write(fd, so_data, so_size);
1659+
close(fd);
1660+
1661+
if (written != static_cast<ssize_t>(so_size)) {
1662+
DEV_INFO(
1663+
"Thread %d: Cannot write SO to %s (errno=%d), trying next path", thread_idx, so_path, errno
1664+
);
1665+
unlink(so_path);
1666+
continue;
1667+
}
1668+
file_created = true;
1669+
DEV_INFO("Thread %d: Created SO file at %s (%zu bytes)", thread_idx, so_path, so_size);
1670+
}
1671+
1672+
if (!file_created) {
1673+
DEV_ERROR("Thread %d: Failed to create SO file in any candidate path", thread_idx);
1674+
return -1;
1675+
}
1676+
1677+
dlerror();
1678+
handle = dlopen(so_path, RTLD_LAZY | RTLD_LOCAL);
1679+
const char *dlopen_err = dlerror();
1680+
1681+
if (handle == nullptr) {
1682+
DEV_ERROR("Thread %d: dlopen failed: %s", thread_idx, dlopen_err ? dlopen_err : "unknown");
1683+
unlink(so_path);
1684+
return -1;
1685+
}
1686+
DEV_INFO("Thread %d: dlopen succeeded, handle=%p", thread_idx, handle);
16631687
}
1664-
DEV_INFO("Thread %d: dlopen succeeded, handle=%p", thread_idx, handle);
16651688

16661689
dlerror();
16671690
auto config_func =
@@ -2039,8 +2062,15 @@ int32_t AicpuExecutor::run(Runtime *runtime) {
20392062
// Destroy PTO2 runtime and close orchestration SO (moved from orchestrator path)
20402063
if (!runtime->get_orch_built_on_host() && orch_so_handle_ != nullptr) {
20412064
pto2_runtime_destroy(rt);
2042-
dlclose(orch_so_handle_);
2043-
unlink(orch_so_path_);
2065+
// Handle cleanup based on loading method
2066+
if (orch_so_memfd_ >= 0) {
2067+
// memfd-based: close fd AFTER dlclose
2068+
cleanup_memfd_so(orch_so_memfd_, orch_so_handle_);
2069+
} else {
2070+
// File-based: dlclose handle and unlink file
2071+
dlclose(orch_so_handle_);
2072+
unlink(orch_so_path_);
2073+
}
20442074
}
20452075
DEV_ALWAYS("Thread %d: Last thread, marking executor finished", thread_idx);
20462076
}
@@ -2100,6 +2130,7 @@ void AicpuExecutor::deinit(Runtime *runtime) {
21002130
orch_args_cached_ = nullptr;
21012131
orch_so_handle_ = nullptr;
21022132
orch_so_path_[0] = '\0';
2133+
orch_so_memfd_ = -1;
21032134

21042135
// Reset register-related state
21052136
for (int32_t i = 0; i < MAX_CORES_PER_THREAD; i++) {
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
/*
2+
* Copyright (c) PyPTO Contributors.
3+
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
4+
* CANN Open Software License Agreement Version 2.0 (the "License").
5+
* Please refer to the License for details. You may not use this file except in compliance with the License.
6+
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
7+
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
8+
* See LICENSE in the root of the software repository for the full text of the License.
9+
* -----------------------------------------------------------------------------------------------------------
10+
*/
11+
12+
/**
13+
* @file memfd_loader.h
14+
* @brief Memory file descriptor based SO loading for AICPU environment
15+
*/
16+
17+
// Enable GNU extensions for memfd_create and MFD_CLOEXEC
18+
#ifndef _GNU_SOURCE
19+
#define _GNU_SOURCE
20+
#endif
21+
22+
#ifndef MEMFD_LOADER_H
23+
#define MEMFD_LOADER_H
24+
25+
#ifdef __cplusplus
26+
extern "C" {
27+
#endif
28+
29+
#include <dlfcn.h>
30+
#include <fcntl.h>
31+
#include <unistd.h>
32+
#include <sys/mman.h>
33+
#include <cstring>
34+
#include <cstdio>
35+
36+
#include "aicpu/device_log.h"
37+
38+
/**
39+
* Load orchestration SO using memfd
40+
*/
41+
static inline int load_orchestration_so_with_memfd(
42+
const void *so_data,
43+
size_t so_size,
44+
int orch_thread_num,
45+
void **out_handle,
46+
char *out_so_path,
47+
int *out_memfd
48+
) {
49+
*out_handle = nullptr;
50+
*out_memfd = -1;
51+
out_so_path[0] = '\0';
52+
53+
if (so_data == nullptr || so_size == 0) {
54+
return -1;
55+
}
56+
57+
// Create memfd
58+
int fd = memfd_create("libdevice_orch", MFD_CLOEXEC);
59+
60+
if (fd < 0) {
61+
return -1;
62+
}
63+
64+
// Write SO data to memfd
65+
ssize_t written = write(fd, so_data, so_size);
66+
67+
if (written < 0) {
68+
close(fd);
69+
return -1;
70+
}
71+
if (written != static_cast<ssize_t>(so_size)) {
72+
close(fd);
73+
return -1;
74+
}
75+
76+
// Reset file position to beginning before dlopen
77+
lseek(fd, 0, SEEK_SET);
78+
79+
// Construct /proc/self/fd/N path for symlink target
80+
char proc_fd_path[256];
81+
snprintf(proc_fd_path, sizeof(proc_fd_path), "/proc/self/fd/%d", fd);
82+
83+
// Create a symlink to /proc/self/fd/N with a "normal" path
84+
// This bypasses the AICPU dynamic linker's issue with /proc/self/fd/N paths
85+
char link_path[256];
86+
snprintf(link_path, sizeof(link_path), "/tmp/libdevice_orch_%d_%d.so", getpid(), orch_thread_num);
87+
88+
int symlink_rc = symlink(proc_fd_path, link_path);
89+
if (symlink_rc != 0) {
90+
close(fd);
91+
return -1;
92+
}
93+
94+
snprintf(out_so_path, 256, "%s", link_path);
95+
96+
// Try dlopen from the symlink
97+
dlerror();
98+
void *handle = dlopen(out_so_path, RTLD_LAZY | RTLD_LOCAL);
99+
100+
// Clean up symlink immediately after dlopen (dlopen has its own reference)
101+
unlink(link_path);
102+
103+
if (handle == nullptr) {
104+
close(fd);
105+
return -1;
106+
}
107+
108+
*out_handle = handle;
109+
*out_memfd = fd;
110+
return 0;
111+
}
112+
113+
/**
114+
* Cleanup memfd-based SO
115+
*/
116+
static inline void cleanup_memfd_so(int memfd, void *handle) {
117+
if (handle != nullptr) {
118+
dlclose(handle);
119+
}
120+
if (memfd >= 0) {
121+
close(memfd);
122+
}
123+
}
124+
125+
#ifdef __cplusplus
126+
}
127+
#endif
128+
129+
#endif // MEMFD_LOADER_H

0 commit comments

Comments
 (0)