|
4 | 4 |
|
5 | 5 | #include <pybind11/pybind11.h> |
6 | 6 | #include <pybind11/stl.h> |
| 7 | +#include <dlfcn.h> |
7 | 8 | #include "numpycpp/numpy_py.h" |
8 | 9 |
|
9 | 10 | namespace py = pybind11; |
@@ -40,6 +41,16 @@ namespace py = pybind11; |
40 | 41 | PYBIND11_MODULE(numpycpp, m) { |
41 | 42 | m.doc() = "C++ pixel-level alignment of Python numpy, powered by Eigen"; |
42 | 43 |
|
| 44 | + // -- eager init: 预加载 g_svml_handle,加速首次 dlsym 调用。 |
| 45 | + // resolve_svml() 自身也是 fork 安全的(pid 检测),这里只是预热。-- |
| 46 | + { |
| 47 | + std::string path = numpy::detail::find_umath_path(); |
| 48 | + if (!path.empty()) { |
| 49 | + numpy::detail::g_svml_handle = dlopen(path.c_str(), RTLD_NOLOAD | RTLD_LAZY); |
| 50 | + numpy::detail::g_svml_pid = getpid(); |
| 51 | + } |
| 52 | + } |
| 53 | + |
43 | 54 | // -- 编译模式报告 ---------------------------------------------------------- |
44 | 55 | m.def("compile_mode", []() -> const char* { |
45 | 56 | #ifdef NUMPYCPP_STD_ONLY |
@@ -78,6 +89,39 @@ PYBIND11_MODULE(numpycpp, m) { |
78 | 89 | return result; |
79 | 90 | }); |
80 | 91 |
|
| 92 | + // -- atan2 dlsym 诊断 ---------------------------------------------------- |
| 93 | + m.def("_diag_atan2_dlsym", []() -> py::dict { |
| 94 | + py::dict r; |
| 95 | + std::string umath_path = numpy::detail::find_umath_path(); |
| 96 | + r["umath_path"] = umath_path.empty() ? "(null)" : umath_path; |
| 97 | + if (!umath_path.empty()) { |
| 98 | + void* h = dlopen(umath_path.c_str(), RTLD_NOLOAD | RTLD_LAZY); |
| 99 | + r["dlopen_manual_ok"] = (h != nullptr); |
| 100 | + if (!h) r["dlerror"] = dlerror(); |
| 101 | + numpy::detail::g_svml_handle = h; |
| 102 | + } |
| 103 | + r["handle_ok"] = (numpy::detail::g_svml_handle != nullptr); |
| 104 | + // 尝试解析各种符号 |
| 105 | + void* sym_npy = numpy::detail::g_svml_handle ? |
| 106 | + dlsym(numpy::detail::g_svml_handle, "npy_atan2") : nullptr; |
| 107 | + void* sym_svml = numpy::detail::g_svml_handle ? |
| 108 | + dlsym(numpy::detail::g_svml_handle, "__svml_atan28") : nullptr; |
| 109 | + void* sym_std = dlsym(RTLD_DEFAULT, "atan2"); |
| 110 | + r["npy_atan2_ok"] = (sym_npy != nullptr); |
| 111 | + r["svml_atan28_ok"] = (sym_svml != nullptr); |
| 112 | + r["std_atan2_ok"] = (sym_std != nullptr); |
| 113 | + // 实际调用 npy_atan2 |
| 114 | + if (sym_npy) { |
| 115 | + auto fn = (double (*)(double,double))sym_npy; |
| 116 | + r["npy_test_val"] = fn(4.7294, 3.5340); |
| 117 | + } |
| 118 | + if (sym_std) { |
| 119 | + auto fn = (double (*)(double,double))sym_std; |
| 120 | + r["std_test_val"] = fn(4.7294, 3.5340); |
| 121 | + } |
| 122 | + return r; |
| 123 | + }); |
| 124 | + |
81 | 125 | // -- linalg submodule -------------------------------------------------- |
82 | 126 | py::module_ la = m.def_submodule("linalg", "numpy.linalg equivalents"); |
83 | 127 | la.def("norm", static_cast<float(*)(const py::array_t<float>&)>(&numpy::linalg::norm)); |
|
0 commit comments