Skip to content

Commit 15ff448

Browse files
authored
multi function invoke (#22)
* move concurrent queue to cmake fetch * add tensor movement test * fix asan, tsan bugs
1 parent ff96179 commit 15ff448

10 files changed

Lines changed: 358 additions & 163 deletions

File tree

CMakeLists.txt

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,71 @@
1-
cmake_minimum_required(VERSION 3.14)
1+
cmake_minimum_required(VERSION 3.27)
22
cmake_policy(SET CMP0148 NEW)
33
project(pyscheduler VERSION 0.1.0 LANGUAGES CXX)
44

55
set(CMAKE_CXX_STANDARD 17)
6+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
7+
8+
# fetch concurrent queue
9+
include(FetchContent)
10+
FetchContent_Declare(
11+
concurrentqueue
12+
GIT_REPOSITORY https://github.com/cameron314/concurrentqueue.git
13+
GIT_TAG master
14+
)
15+
FetchContent_MakeAvailable(concurrentqueue)
616

717
find_package(pybind11 CONFIG REQUIRED)
8-
find_package(concurrentqueue CONFIG REQUIRED)
18+
find_package(Threads REQUIRED)
919

1020
# create library target
1121
add_library(${PROJECT_NAME} INTERFACE)
1222
target_link_libraries(${PROJECT_NAME} INTERFACE
1323
pybind11::embed
14-
concurrentqueue::concurrentqueue
24+
concurrentqueue
25+
Threads::Threads
1526
)
1627
target_include_directories(${PROJECT_NAME} INTERFACE
1728
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
1829
$<INSTALL_INTERFACE:include>
1930
)
2031

32+
# Diagnostic flags
33+
option(ENABLE_GPROF "Enable gprof profiling flags (-pg)" OFF)
34+
option(ENABLE_FP "Enable frame pointer for flamegraph generation" OFF)
35+
option(ENABLE_ASAN "Enable AddressSanitizer" OFF)
36+
option(ENABLE_TSAN "Enable ThreadSanitizer" OFF)
37+
if (ENABLE_ASAN AND ENABLE_TSAN)
38+
message(FATAL_ERROR "ASan and TSan cannot be enabled simultaneously.")
39+
endif()
40+
41+
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
42+
if (ENABLE_GPROF)
43+
message(STATUS "Enabling gprof (-pg)")
44+
add_compile_options(-pg)
45+
add_link_options(-pg)
46+
endif()
47+
48+
if (ENABLE_FP)
49+
message(STATUS "Enabling frame pointer (-fno-omit-frame-pointer)")
50+
add_compile_options(-fno-omit-frame-pointer -g)
51+
if (CMAKE_BUILD_TYPE MATCHES "Debug|RelWithDebInfo")
52+
add_compile_options(-g)
53+
endif()
54+
endif()
55+
56+
if (ENABLE_ASAN)
57+
message(STATUS "Using AddressSanitizer")
58+
add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
59+
add_link_options(-fsanitize=address)
60+
endif()
61+
62+
if (ENABLE_TSAN)
63+
message(STATUS "Using ThreadSanitizer")
64+
add_compile_options(-fsanitize=thread -fno-omit-frame-pointer)
65+
add_link_options(-fsanitize=thread)
66+
endif()
67+
endif()
68+
2169
# Build example programs
2270
option(BUILD_EXAMPLES "Build example programs" OFF)
2371
if(BUILD_EXAMPLES)

configure.sh

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ BUILD_EXAMPLES=OFF
99
BUILD_TESTS=OFF
1010
PYTHON_UDL_INTERFACE_PREFIX="${PYTHON_UDL_INTERFACE_PREFIX:-/usr/local}"
1111

12+
ENABLE_GPROF=OFF
13+
ENABLE_ASAN=OFF
14+
ENABLE_TSAN=OFF
15+
ENABLE_FP=OFF
16+
1217
# Parse arguments
1318
while [[ "$#" -gt 0 ]]; do
1419
case $1 in
@@ -30,13 +35,29 @@ while [[ "$#" -gt 0 ]]; do
3035
PYTHON_UDL_INTERFACE_PREFIX="$2"
3136
shift
3237
;;
38+
--gprof)
39+
ENABLE_GPROF=ON
40+
;;
41+
--asan)
42+
ENABLE_ASAN=ON
43+
;;
44+
--tsan)
45+
ENABLE_TSAN=ON
46+
;;
47+
--flame)
48+
ENABLE_FP=ON
49+
;;
3350
-h | --help)
3451
echo "Usage: ./configure.sh [options]"
35-
echo " -m | --mode : Build type (default: Debug)"
52+
echo " -m | --mode : Build type (Debug/Release/RelWithDebInfo)"
3653
echo " -d | --dir : Build directory (default: build)"
37-
echo " -e | --examples : Enable building examples (default: OFF)"
38-
echo " -t | --tests : Enable building tests (default: OFF)"
39-
echo " -p | --prefix : Install prefix (default: /usr/local or \$PYTHON_UDL_INTERFACE_PREFIX)"
54+
echo " -e | --examples : Enable building examples"
55+
echo " -t | --tests : Enable building tests"
56+
echo " -p | --prefix : Install prefix"
57+
echo " --gprof : Enable gprof profiling"
58+
echo " --asan : Enable AddressSanitizer"
59+
echo " --tsan : Enable ThreadSanitizer"
60+
echo " --flame : Enable frame pointer for flamegraph generation"
4061
exit 0
4162
;;
4263
*)
@@ -47,12 +68,19 @@ while [[ "$#" -gt 0 ]]; do
4768
shift
4869
done
4970

71+
# Summary
72+
BUILD_DIR="${BUILD_DIR}-${BUILD_TYPE}"
73+
5074
echo "Configuring project..."
5175
echo " Build type : $BUILD_TYPE"
5276
echo " Build dir : $BUILD_DIR"
5377
echo " Build examples : $BUILD_EXAMPLES"
5478
echo " Build tests : $BUILD_TESTS"
5579
echo " Install prefix : $PYTHON_UDL_INTERFACE_PREFIX"
80+
echo " ENABLE_GPROF : $ENABLE_GPROF"
81+
echo " ENABLE_ASAN : $ENABLE_ASAN"
82+
echo " ENABLE_TSAN : $ENABLE_TSAN"
83+
echo " ENABLE_FP : $ENABLE_FP"
5684

5785
# Create build directory
5886
mkdir -p "$BUILD_DIR"
@@ -63,6 +91,10 @@ cmake -B "$BUILD_DIR" \
6391
-DCMAKE_INSTALL_PREFIX="$PYTHON_UDL_INTERFACE_PREFIX" \
6492
-DBUILD_EXAMPLES="$BUILD_EXAMPLES" \
6593
-DBUILD_TESTS="$BUILD_TESTS" \
94+
-DENABLE_GPROF="$ENABLE_GPROF" \
95+
-DENABLE_ASAN="$ENABLE_ASAN" \
96+
-DENABLE_TSAN="$ENABLE_TSAN" \
97+
-DENABLE_FP="$ENABLE_FP" \
6698
-S .
6799

68100
echo "Configuration complete. You can now run: cmake --build $BUILD_DIR"

examples/CMakeLists.txt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,5 @@
1-
add_executable(add_example add/main.cpp)
2-
target_link_libraries(add_example PRIVATE pyscheduler)
1+
add_executable(add add/main.cpp)
2+
target_link_libraries(add PRIVATE pyscheduler)
3+
4+
add_executable(tensor_share tensor_share/main.cpp)
5+
target_link_libraries(tensor_share PRIVATE pyscheduler)

examples/add/main.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ int main() {
66
pyscheduler::PyManager manager;
77
pyscheduler::PyManager::InvokeHandler add =
88
manager.loadPythonModule("examples.add.python_modules.add", "invoke");
9-
std::cout << add.invoke<int64_t>(3000, -1234) << std::endl;
9+
for (int i = 0; i < 200; i++) {
10+
std::cout << add.invoke<int64_t>(3000, -1234) << "\n";
11+
}
12+
std::cout.flush();
1013
return 0;
1114
}

examples/add/python_modules/tensor_mul.py

Lines changed: 0 additions & 9 deletions
This file was deleted.

examples/tensor_share/main.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#include <chrono>
2+
#include <pyscheduler/pyscheduler.hpp>
3+
4+
using namespace std::chrono;
5+
using namespace std;
6+
7+
constexpr int const NUM_ITERATIONS = 600;
8+
constexpr int const DIM = 100;
9+
10+
int main() {
11+
auto module_load_start = chrono::high_resolution_clock::now();
12+
pyscheduler::PyManager manager;
13+
pyscheduler::PyManager::InvokeHandler generator = manager.loadPythonModule("examples.tensor_share.python_modules.tensor_juggler", "generate_tensor");
14+
pyscheduler::PyManager::InvokeHandler fma = manager.loadPythonModule("examples.tensor_share.python_modules.tensor_juggler", "multiply_sum_tensors");
15+
auto module_load_end = chrono::high_resolution_clock::now();
16+
17+
vector<std::pair<pybind11::object, pybind11::object>> cache;
18+
19+
for (int i = 0; i < NUM_ITERATIONS; i++) {
20+
cache.emplace_back(
21+
std::make_pair(
22+
generator.invoke([&](pybind11::object x){return x; }, DIM, DIM),
23+
generator.invoke([&](pybind11::object x){return x; }, DIM, DIM)
24+
)
25+
);
26+
}
27+
28+
// solve problems
29+
std::vector<std::future<int>> promises;
30+
promises.reserve(NUM_ITERATIONS);
31+
32+
33+
auto module_solve_start = chrono::high_resolution_clock::now();
34+
for (int i = 0; i < NUM_ITERATIONS; i++) {
35+
auto [a, b] = std::move(cache[i]);
36+
promises.emplace_back(
37+
fma.queue_invoke(
38+
[](pybind11::object x){return x.cast<int>();},
39+
std::move(a),
40+
std::move(b)
41+
)
42+
);
43+
}
44+
for(auto &o : promises) {
45+
o.wait();
46+
}
47+
auto module_solve_end = chrono::high_resolution_clock::now();
48+
std::cout << duration_cast<microseconds>(module_solve_end - module_solve_start).count() << std::endl;
49+
50+
return 0;
51+
}

examples/tensor_share/main.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import torch.utils.dlpack as dlpack
2+
import torch
3+
import time
4+
def generate_tensor(length: int, width: int):
5+
tensor = torch.rand(length, width, dtype=torch.float32)
6+
return dlpack.to_dlpack(tensor) # type: ignore
7+
8+
def multiply_sum_tensors(a, b) -> int:
9+
a_tensor = dlpack.from_dlpack(a)
10+
b_tensor = dlpack.from_dlpack(b)
11+
c_tensor = a_tensor * b_tensor
12+
d_tensor = torch.sum(c_tensor)
13+
return int(d_tensor.item())
14+
15+
16+
def main():
17+
DIM = 100
18+
NUM_ITERATIONS = 600
19+
20+
x = time.perf_counter_ns()
21+
a_tensors = [ generate_tensor(DIM, DIM) for _ in range(NUM_ITERATIONS) ]
22+
b_tensors = [ generate_tensor(DIM, DIM) for _ in range(NUM_ITERATIONS) ]
23+
24+
for i in range(NUM_ITERATIONS):
25+
result = multiply_sum_tensors(a_tensors[i], b_tensors[i])
26+
y = time.perf_counter_ns()
27+
print(f"Elapsed Time (us): {(y - x) / 1e3}")
28+
29+
if __name__ == "__main__":
30+
main()
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import torch.utils.dlpack as dlpack
2+
import torch
3+
def generate_tensor(length: int, width: int):
4+
tensor = torch.rand(length, width, dtype=torch.float32)
5+
return dlpack.to_dlpack(tensor) # type: ignore
6+
7+
def multiply_sum_tensors(a, b) -> int:
8+
a_tensor = dlpack.from_dlpack(a)
9+
b_tensor = dlpack.from_dlpack(b)
10+
c_tensor = a_tensor * b_tensor
11+
d_tensor = torch.sum(c_tensor)
12+
return int(d_tensor.item())

0 commit comments

Comments
 (0)