4 cmake install (#10)

jeffreyqdd · web-flow · commit 9a153981318f · 2025-04-22T16:10:56.000-04:00
* remove submodules

* remove extra cd and make it a flag

* update installation instructions

* added ability to install library
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,23 +1,69 @@
 cmake_minimum_required(VERSION 3.14)
+cmake_policy(SET CMP0148 NEW)
 project(pyscheduler VERSION 0.1.0 LANGUAGES CXX)
 
 set(CMAKE_CXX_STANDARD 17)
 
-add_subdirectory(lib/dlpack)
-add_subdirectory(lib/pybind11)
-add_subdirectory(lib/concurrentqueue)
+
+find_package(pybind11 REQUIRED)
+find_package(concurrentqueue REQUIRED)
 
 add_library(${PROJECT_NAME} SHARED "include/pyscheduler/pyscheduler.cpp")
-target_link_libraries(pyscheduler PUBLIC
-    dlpack
-    concurrentqueue
-    pybind11::embed
+target_link_libraries(pyscheduler PUBLIC pybind11::embed concurrentqueue::concurrentqueue)
+target_include_directories(pyscheduler
+    PUBLIC
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+    $<INSTALL_INTERFACE:include>
 )
-target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_include_directories(${PROJECT_NAME} PUBLIC include)
 
 # Build example programs
 option(BUILD_EXAMPLES "Build example programs" OFF)
 if(BUILD_EXAMPLES)
     add_subdirectory(examples)
 endif()
+
+# Install
+include(GNUInstallDirs)
+include(CMakePackageConfigHelpers)
+
+install(
+    TARGETS ${PROJECT_NAME}
+    EXPORT ${PROJECT_NAME}Targets
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} # .so / .dylib
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} # .a
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} # .exe on Windows
+    INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} # for modern CMake
+
+)
+
+install(
+    DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/
+    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+    FILES_MATCHING PATTERN "*.hpp" PATTERN "*.h"
+)
+
+install(
+    EXPORT ${PROJECT_NAME}Targets
+    FILE ${PROJECT_NAME}Targets.cmake
+    NAMESPACE ${PROJECT_NAME}::
+    DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
+)
+
+write_basic_package_version_file(
+    "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
+    VERSION ${PROJECT_VERSION}
+    COMPATIBILITY AnyNewerVersion
+)
+
+configure_package_config_file(
+    "${CMAKE_CURRENT_LIST_DIR}/cmake/Config.cmake.in"
+    "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
+    INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
+)
+
+install(
+    FILES
+    "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
+    "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
+    DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
+)
diff --git a/README.md b/README.md
@@ -25,7 +25,7 @@ Optimized as an execution engine for Machine Learning pipelines.
 - **Thread Safe Implementation** 
   Ensures only one Python interpreter is ever initialized per process for pre python3.13. 
     - [x] Pre Python 3.13 
-    - [ ] Python 3.13+ no-gil sub interpretor 
+    - [ ] Python 3.13+ no-gil sub interpreter 
 - **Thread‑Pooled Execution**  
   Uses a high‑performance round robin queue to minimize latency during high throughput workloads
 - **Optimized GIL Management**  
@@ -47,11 +47,14 @@ Package Dependencies
 - [moodycamel::BlockingConcurrentQueue](https://github.com/cameron314/concurrentqueue)  
 - [dmlc::dlpack](https://github.com/dmlc/dlpack.git)
 
+Can be installed ob Ubuntu using `sudo apt install pybind11-dev libconcurrentqueue-dev libdlpack-dev`
+
 ## Installation
 1. Initialize submodules `git submodule update --init --recursive`
 2. We provide a simple `configure.sh` script to invoke CMake configuration. Use the `-h | --help` flag to see the possible options.
 ```bash
 # Configure a Release build, including examples
 ./configure.sh -t Release -e
 ```
-3. Optional: install `cmake --install build`
+3. Install `sudo cmake --install build`
+3. Uninstall `sudo xargs rm < build/install_manifest.txt`
diff --git a/cmake/Config.cmake.in b/cmake/Config.cmake.in
@@ -0,0 +1,6 @@
+@PACKAGE_INIT@
+
+include("${CMAKE_CURRENT_LIST_DIR}/${PROJECT_NAME}Targets.cmake")
+
+find_dependency(pybind11)
+find_dependency(concurrentqueue)
diff --git a/configure.sh b/configure.sh
@@ -43,9 +43,8 @@ echo "  Build examples : $BUILD_EXAMPLES"
 
 # Create build directory
 mkdir -p "$BUILD_DIR"
-cd "$BUILD_DIR"
 
 # Run CMake configuration
-cmake .. -DCMAKE_BUILD_TYPE="$BUILD_TYPE" -DBUILD_EXAMPLES="$BUILD_EXAMPLES"
+cmake -B "$BUILD_DIR" -DCMAKE_BUILD_TYPE="$BUILD_TYPE" -DBUILD_EXAMPLES="$BUILD_EXAMPLES" -S .
 
 echo "Configuration complete. You can now run: cmake --build $BUILD_DIR"
diff --git a/include/pyscheduler/pyscheduler.hpp b/include/pyscheduler/pyscheduler.hpp
@@ -2,8 +2,8 @@
 #include "pyscheduler/library_export.hpp"
 
 #include <atomic>
-#include <blockingconcurrentqueue.h>
 #include <chrono>
+#include <moodycamel/blockingconcurrentqueue.h>
 #include <filesystem>
 #include <future>
 #include <iostream>
diff --git a/include/pyscheduler/tensor.hpp b/include/pyscheduler/tensor.hpp
@@ -0,0 +1,81 @@
+#include "pyscheduler/library_export.hpp"
+#include <cuda_runtime.h>
+#include <dlpack.h>
+#include <memory>
+
+enum class DeviceType {
+	CPU,
+	CUDA,
+};
+
+template <typename T>
+struct DLPackTypeTraits;
+
+// I love explicit template specialization
+// NOTE: If you get a compile time, add an entry here:
+
+template <>
+struct DLPackTypeTraits<float> {
+	static constexpr DLDataType dtype = { kDLFloat, 32, 1 };
+};
+
+template <>
+struct DLPackTypeTraits<double> {
+	static constexpr DLDataType dtype = { kDLFloat, 64, 1 };
+};
+
+template <>
+struct DLPackTypeTraits<int64_t> {
+	static constexpr DLDataType dtype = { kDLInt, 64, 1 };
+};
+
+template <>
+struct DLPackTypeTraits<int32_t> {
+	static constexpr DLDataType dtype = { kDLInt, 32, 1 };
+};
+
+template <>
+struct DLPackTypeTraits<uint8_t> {
+	static constexpr DLDataType dtype = { kDLUInt, 8, 1 };
+};
+
+template <DeviceType Device, typename DataType, size_t... Dims>
+std::unique_ptr<DLManagedTensor> createDlpackTensor() {
+	constexpr int ndim = sizeof...(Dims);
+	constexpr int64_t num_items = (... * Dims); // C++17 fold expression
+
+	// Allocate and set shape
+	int64_t* shape = new int64_t[ndim]{ Dims... };
+
+	// Allocate tensor memory
+	DataType* data;
+	if constexpr(Device == DeviceType::CPU) {
+		data = new T[num_items];
+	} else if constexpr(Device == DeviceType::CUDA) {
+		cudaMalloc(&data, num_items * sizeof(T));
+	}
+
+	// Create DLManagedTensor
+	DLManagedTensor* managed_tensor = new DLManagedTensor();
+	managed_tensor->dl_tensor.data = data;
+	managed_tensor->dl_tensor.device = { Device == DeviceType::CPU ? kDLCPU : kDLCUDA, 0 };
+	managed_tensor->dl_tensor.ndim = ndim;
+	managed_tensor->dl_tensor.dtype = DLPackTypeTraits<DataType>::dtype;
+
+	managed_tensor->dl_tensor.shape = shape;
+	managed_tensor->dl_tensor.strides = nullptr;
+	managed_tensor->dl_tensor.byte_offset = 0;
+	managed_tensor->dl_tensor.shape = shape;
+	managed_tensor->manager_ctx = nullptr;
+
+	tensor->deleter = [](DLManagedTensor* self) {
+		if(if constexpr Device == DeviceType::GPU)
+			cudaFree(self->dl_tensor.data);
+		else
+			delete[] static_cast<DataType*>(self->dl_tensor.data);
+		delete[] self->dl_tensor.shape;
+		delete self;
+	};
+
+	return std::unique_ptr<DLManagedTensor>(managed_tensor);
+}
diff --git a/lib/concurrentqueue b/lib/concurrentqueue
diff --git a/lib/dlpack b/lib/dlpack
diff --git a/lib/pybind11 b/lib/pybind11