|
28 | 28 | #include "pto2_dispatch_payload.h" |
29 | 29 | #include "runtime.h" |
30 | 30 | #include "spin_hint.h" |
| 31 | +#include "memfd_loader.h" |
31 | 32 |
|
32 | 33 | // Runtime headers (full struct definition for create/destroy + PTO2_SCOPE) |
33 | 34 | #include "pto_runtime2.h" |
@@ -241,6 +242,7 @@ struct AicpuExecutor { |
241 | 242 | // Orchestration SO handle - defer dlclose until all tasks complete |
242 | 243 | void *orch_so_handle_{nullptr}; |
243 | 244 | char orch_so_path_[256]{}; // Path to orchestration SO file for cleanup |
| 245 | + int orch_so_memfd_{-1}; // memfd for memfd_create path (-1 if file-based) |
244 | 246 |
|
245 | 247 | // Shared orchestration function pointer (loaded by first orch thread, used by all) |
246 | 248 | DeviceOrchestrationFunc orch_func_{nullptr}; |
@@ -1618,50 +1620,71 @@ int32_t AicpuExecutor::run(Runtime *runtime) { |
1618 | 1620 | return -1; |
1619 | 1621 | } |
1620 | 1622 |
|
1621 | | - // Try multiple paths that may allow execution on AICPU |
| 1623 | + // Try memfd first, fall back to file-based |
1622 | 1624 | char so_path[256]; |
1623 | | - bool file_created = false; |
1624 | | - const char *candidate_dirs[] = { |
1625 | | - "/usr/lib64/aicpu_kernels/0/aicpu_kernels_device", "/usr/lib64", "/lib64", "/var/tmp", "/tmp" |
1626 | | - }; |
1627 | | - const int32_t num_candidates = sizeof(candidate_dirs) / sizeof(candidate_dirs[0]); |
1628 | | - |
1629 | | - for (int32_t i = 0; i < num_candidates && !file_created; i++) { |
1630 | | - snprintf(so_path, sizeof(so_path), "%s/libdevice_orch_%d.so", candidate_dirs[i], getpid()); |
1631 | | - int32_t fd = open(so_path, O_WRONLY | O_CREAT | O_TRUNC, 0755); |
1632 | | - if (fd < 0) { |
1633 | | - DEV_INFO( |
1634 | | - "Thread %d: Cannot create SO at %s (errno=%d), trying next path", thread_idx, so_path, errno |
1635 | | - ); |
1636 | | - continue; |
1637 | | - } |
1638 | | - ssize_t written = write(fd, so_data, so_size); |
1639 | | - close(fd); |
1640 | | - if (written != static_cast<ssize_t>(so_size)) { |
1641 | | - DEV_INFO( |
1642 | | - "Thread %d: Cannot write SO to %s (errno=%d), trying next path", thread_idx, so_path, errno |
1643 | | - ); |
1644 | | - unlink(so_path); |
1645 | | - continue; |
1646 | | - } |
1647 | | - file_created = true; |
1648 | | - DEV_INFO("Thread %d: Created SO file at %s (%zu bytes)", thread_idx, so_path, so_size); |
1649 | | - } |
| 1625 | + void *handle = nullptr; |
| 1626 | + int memfd = -1; |
1650 | 1627 |
|
1651 | | - if (!file_created) { |
1652 | | - DEV_ERROR("Thread %d: Failed to create SO file in any candidate path", thread_idx); |
1653 | | - return -1; |
| 1628 | + // Attempt memfd-based loading first |
| 1629 | + int memfd_rc = load_orchestration_so_with_memfd( |
| 1630 | + so_data, so_size, thread_idx, &handle, so_path, &memfd |
| 1631 | + ); |
| 1632 | + |
| 1633 | + if (memfd_rc == 0 && handle != nullptr) { |
| 1634 | + // memfd loading succeeded, use memfd-loaded handle |
| 1635 | + orch_so_memfd_ = memfd; |
1654 | 1636 | } |
1655 | 1637 |
|
1656 | | - dlerror(); |
1657 | | - void *handle = dlopen(so_path, RTLD_LAZY | RTLD_LOCAL); |
1658 | | - const char *dlopen_err = dlerror(); |
1659 | 1638 | if (handle == nullptr) { |
1660 | | - DEV_ERROR("Thread %d: dlopen failed: %s", thread_idx, dlopen_err ? dlopen_err : "unknown"); |
1661 | | - unlink(so_path); |
1662 | | - return -1; |
| 1639 | + // memfd failed or unavailable - use file-based loading |
| 1640 | + orch_so_memfd_ = -1; |
| 1641 | + |
| 1642 | + // Try multiple paths that may allow execution on AICPU |
| 1643 | + bool file_created = false; |
| 1644 | + const char *candidate_dirs[] = { |
| 1645 | + "/usr/lib64/aicpu_kernels/0/aicpu_kernels_device", "/usr/lib64", "/lib64", "/var/tmp", "/tmp" |
| 1646 | + }; |
| 1647 | + const int32_t num_candidates = sizeof(candidate_dirs) / sizeof(candidate_dirs[0]); |
| 1648 | + |
| 1649 | + for (int32_t i = 0; i < num_candidates && !file_created; i++) { |
| 1650 | + snprintf(so_path, sizeof(so_path), "%s/libdevice_orch_%d.so", candidate_dirs[i], getpid()); |
| 1651 | + int32_t fd = open(so_path, O_WRONLY | O_CREAT | O_TRUNC, 0755); |
| 1652 | + if (fd < 0) { |
| 1653 | + DEV_INFO( |
| 1654 | + "Thread %d: Cannot create SO at %s (errno=%d), trying next path", thread_idx, so_path, errno |
| 1655 | + ); |
| 1656 | + continue; |
| 1657 | + } |
| 1658 | + ssize_t written = write(fd, so_data, so_size); |
| 1659 | + close(fd); |
| 1660 | + |
| 1661 | + if (written != static_cast<ssize_t>(so_size)) { |
| 1662 | + DEV_INFO( |
| 1663 | + "Thread %d: Cannot write SO to %s (errno=%d), trying next path", thread_idx, so_path, errno |
| 1664 | + ); |
| 1665 | + unlink(so_path); |
| 1666 | + continue; |
| 1667 | + } |
| 1668 | + file_created = true; |
| 1669 | + DEV_INFO("Thread %d: Created SO file at %s (%zu bytes)", thread_idx, so_path, so_size); |
| 1670 | + } |
| 1671 | + |
| 1672 | + if (!file_created) { |
| 1673 | + DEV_ERROR("Thread %d: Failed to create SO file in any candidate path", thread_idx); |
| 1674 | + return -1; |
| 1675 | + } |
| 1676 | + |
| 1677 | + dlerror(); |
| 1678 | + handle = dlopen(so_path, RTLD_LAZY | RTLD_LOCAL); |
| 1679 | + const char *dlopen_err = dlerror(); |
| 1680 | + |
| 1681 | + if (handle == nullptr) { |
| 1682 | + DEV_ERROR("Thread %d: dlopen failed: %s", thread_idx, dlopen_err ? dlopen_err : "unknown"); |
| 1683 | + unlink(so_path); |
| 1684 | + return -1; |
| 1685 | + } |
| 1686 | + DEV_INFO("Thread %d: dlopen succeeded, handle=%p", thread_idx, handle); |
1663 | 1687 | } |
1664 | | - DEV_INFO("Thread %d: dlopen succeeded, handle=%p", thread_idx, handle); |
1665 | 1688 |
|
1666 | 1689 | dlerror(); |
1667 | 1690 | auto config_func = |
@@ -2039,8 +2062,15 @@ int32_t AicpuExecutor::run(Runtime *runtime) { |
2039 | 2062 | // Destroy PTO2 runtime and close orchestration SO (moved from orchestrator path) |
2040 | 2063 | if (!runtime->get_orch_built_on_host() && orch_so_handle_ != nullptr) { |
2041 | 2064 | pto2_runtime_destroy(rt); |
2042 | | - dlclose(orch_so_handle_); |
2043 | | - unlink(orch_so_path_); |
| 2065 | + // Handle cleanup based on loading method |
| 2066 | + if (orch_so_memfd_ >= 0) { |
| 2067 | + // memfd-based: close fd AFTER dlclose |
| 2068 | + cleanup_memfd_so(orch_so_memfd_, orch_so_handle_); |
| 2069 | + } else { |
| 2070 | + // File-based: dlclose handle and unlink file |
| 2071 | + dlclose(orch_so_handle_); |
| 2072 | + unlink(orch_so_path_); |
| 2073 | + } |
2044 | 2074 | } |
2045 | 2075 | DEV_ALWAYS("Thread %d: Last thread, marking executor finished", thread_idx); |
2046 | 2076 | } |
@@ -2100,6 +2130,7 @@ void AicpuExecutor::deinit(Runtime *runtime) { |
2100 | 2130 | orch_args_cached_ = nullptr; |
2101 | 2131 | orch_so_handle_ = nullptr; |
2102 | 2132 | orch_so_path_[0] = '\0'; |
| 2133 | + orch_so_memfd_ = -1; |
2103 | 2134 |
|
2104 | 2135 | // Reset register-related state |
2105 | 2136 | for (int32_t i = 0; i < MAX_CORES_PER_THREAD; i++) { |
|
0 commit comments