From c3f41610522132b6b5a5e06f641187128071a796 Mon Sep 17 00:00:00 2001 From: isoldpower Date: Sun, 23 Nov 2025 20:57:07 +0500 Subject: [PATCH 01/19] feat(structures): fine-grained lock implementation for unbounded queue The implementation uses 2 mutexes, for head and for tail. Considered easiest among implementation --- .clang-tidy | 4 +- executables/CMakeLists.txt | 4 +- executables/print_hello.cpp | 7 -- executables/unbounded_queue_benchmark.cpp | 67 ++++++++++ multithreading/structures/CMakeLists.txt | 14 +++ .../unbounded_queue/FGLockUnboundedQueue.h | 119 ++++++++++++++++++ .../unbounded_queue/LockFreeUnboundedQueue.h | 12 ++ .../include/unbounded_queue/UnboundedQueue.h | 19 +++ multithreading/structures/readme.md | 25 ++++ .../unbounded_queue/FGLockUnboundedQueue.cpp | 7 ++ .../LockFreeUnboundedQueue.cpp | 6 + .../src/unbounded_queue/UnboundedQueue.cpp | 6 + multithreading/structures/tests/main.cpp | 9 ++ .../utilities/include/Application.cpp | 5 + .../utilities/include/Application.h | 38 ++++++ 15 files changed, 331 insertions(+), 11 deletions(-) delete mode 100644 executables/print_hello.cpp create mode 100644 executables/unbounded_queue_benchmark.cpp create mode 100644 multithreading/structures/CMakeLists.txt create mode 100644 multithreading/structures/include/unbounded_queue/FGLockUnboundedQueue.h create mode 100644 multithreading/structures/include/unbounded_queue/LockFreeUnboundedQueue.h create mode 100644 multithreading/structures/include/unbounded_queue/UnboundedQueue.h create mode 100644 multithreading/structures/readme.md create mode 100644 multithreading/structures/src/unbounded_queue/FGLockUnboundedQueue.cpp create mode 100644 multithreading/structures/src/unbounded_queue/LockFreeUnboundedQueue.cpp create mode 100644 multithreading/structures/src/unbounded_queue/UnboundedQueue.cpp create mode 100644 multithreading/structures/tests/main.cpp create mode 100644 multithreading/utilities/include/Application.cpp create mode 100644 multithreading/utilities/include/Application.h diff --git a/.clang-tidy b/.clang-tidy index aa6ec12..3e44f1a 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,5 +1,5 @@ -Checks: 'clang-diagnostic-*,clang-analyzer-*,cppcoreguidelines-*,modernize-*,*,-clang-diagnostic-error,-llvmlibc-*,-llvm-include-order,-misc-include-cleaner,-fuchsia-trailing-return,-modernize-use-trailing-return-type' -WarningsAsErrors: 'clang-diagnostic-*,clang-analyzer-*,cppcoreguidelines-*,modernize-*,*,-clang-diagnostic-error,-llvmlibc-*,-llvm-include-order,-misc-include-cleaner,-fuchsia-trailing-return,-modernize-use-trailing-return-type' +Checks: 'clang-diagnostic-*,clang-analyzer-*,cppcoreguidelines-*,modernize-*,*,-clang-diagnostic-error,-llvmlibc-*,-llvm-include-order,-misc-include-cleaner,-fuchsia-trailing-return,-modernize-use-trailing-return-type,-altera-unroll-loops,-misc-use-anonymous-namespace' +WarningsAsErrors: 'clang-diagnostic-*,clang-analyzer-*,cppcoreguidelines-*,modernize-*,*,-clang-diagnostic-error,-llvmlibc-*,-llvm-include-order,-misc-include-cleaner,-fuchsia-trailing-return,-modernize-use-trailing-return-type,-altera-unroll-loops,-misc-use-anonymous-namespace' HeaderFilterRegex: '^(?!.*/build/).*' FormatStyle: google CheckOptions: diff --git a/executables/CMakeLists.txt b/executables/CMakeLists.txt index add602d..6144241 100644 --- a/executables/CMakeLists.txt +++ b/executables/CMakeLists.txt @@ -1,2 +1,2 @@ -add_executable(print_hello print_hello.cpp) -target_link_libraries(print_hello PRIVATE utilities cxx_setup) +add_executable(unbounded_queue_benchmark unbounded_queue_benchmark.cpp) +target_link_libraries(unbounded_queue_benchmark PRIVATE utilities cxx_setup) diff --git a/executables/print_hello.cpp b/executables/print_hello.cpp deleted file mode 100644 index 9b78afc..0000000 --- a/executables/print_hello.cpp +++ /dev/null @@ -1,7 +0,0 @@ -#include - -auto main() -> int { - multithreading::utilities::Placeholder::SayHello("Nikita"); - - return 0; -} diff --git a/executables/unbounded_queue_benchmark.cpp b/executables/unbounded_queue_benchmark.cpp new file mode 100644 index 0000000..378d5f3 --- /dev/null +++ b/executables/unbounded_queue_benchmark.cpp @@ -0,0 +1,67 @@ +#include + +#include +#include + +#include "multithreading/utilities/include/Application.h" + + +static int runQueueMultithreading() { + constexpr size_t PER_THREAD_SIZE = 1000; + constexpr size_t DEQUEUE_TIMEOUT_MS = 1000; + + multithreading::structures::unbounded_queue::FGLockUnboundedQueue queue; + + const size_t threadsCount = std::thread::hardware_concurrency(); + std::vector threads {}; + threads.reserve(threadsCount); + + for (size_t i = 0; i < threadsCount; ++i) { + threads.emplace_back([&queue] { + for (size_t j = 0; j < PER_THREAD_SIZE; j++) { + queue.enqueue(static_cast(j)); + } + }); + } + + for (size_t i = 0; i < PER_THREAD_SIZE * threadsCount; ++i) { + std::future> latestValue = queue.wait_dequeue( + std::chrono::milliseconds(DEQUEUE_TIMEOUT_MS) + ); + + const std::optional value = latestValue.get(); + if (value.has_value()) { + std::cout << "Dequeued at iteration " << i + 1 << "(" << value.value() << ")" << '\n'; + } else { + i--; + } + } + + for (size_t i = 0; i < threadsCount; ++i) { + if (threads[i].joinable()) { + threads[i].join(); + } + } + + return 0; +} + +auto main() -> int { + multithreading::utilities::Application benchmarkApplication( + multithreading::utilities::ApplicationInfo{ + .appName="Unbounded Queue Benchmark", + .appVersion="1.0.0" + } + ); + + const std::optional executionResult = benchmarkApplication.SafeStart( + runQueueMultithreading + ); + + try { + return executionResult.has_value() ? executionResult.value() : -1; + } catch (std::exception& e) { + std::cerr << e.what() << '\n'; + return -1; + } +} \ No newline at end of file diff --git a/multithreading/structures/CMakeLists.txt b/multithreading/structures/CMakeLists.txt new file mode 100644 index 0000000..c280453 --- /dev/null +++ b/multithreading/structures/CMakeLists.txt @@ -0,0 +1,14 @@ +add_library(utilities + src/unbounded_queue/UnboundedQueue.cpp + src/unbounded_queue/FGLockUnboundedQueue.cpp + src/unbounded_queue/LockFreeUnboundedQueue.cpp) + +if (BUILD_TESTING) + add_executable(tests tests/main.cpp) + target_link_libraries(tests PRIVATE GTest::gtest_main) + include(GoogleTest) + gtest_discover_tests(tests) +endif() + +include_directories(include) +target_link_libraries(utilities PUBLIC cxx_setup) \ No newline at end of file diff --git a/multithreading/structures/include/unbounded_queue/FGLockUnboundedQueue.h b/multithreading/structures/include/unbounded_queue/FGLockUnboundedQueue.h new file mode 100644 index 0000000..210167b --- /dev/null +++ b/multithreading/structures/include/unbounded_queue/FGLockUnboundedQueue.h @@ -0,0 +1,119 @@ +#pragma once + +#include + +#include "./UnboundedQueue.h" + +namespace multithreading::structures::unbounded_queue { + + template + struct Node { + private: + T value; + Node* nextNode; + public: + explicit Node(const T& value) + : value(value) + , nextNode(nullptr) + {} + + explicit Node(T&& value) + : value(std::move(value)) + , nextNode(nullptr) + {} + + [[nodiscard]] T& get() { + return value; + } + + [[nodiscard]] Node* next() const { + return nextNode; + } + + void setNext(Node* next) { + this->nextNode = next; + } + }; + + template + class FGLockUnboundedQueue : public UnboundedQueue { + private: + mutable std::mutex head_mu; + mutable std::mutex tail_mu; + + std::condition_variable enqueue_condition; + + Node* head; + Node* tail; + + std::optional unsafe_dequeue() { + Node* dummyNode = head; + Node* firstValuableNode = dummyNode->next(); + + if (firstValuableNode == nullptr) { + return std::nullopt; + } + + std::optional result = std::move(firstValuableNode->get()); + head = firstValuableNode; + delete dummyNode; + + return result; + } + public: + FGLockUnboundedQueue() { + head = tail = new Node(T{}); + } + + ~FGLockUnboundedQueue() { + while (FGLockUnboundedQueue::try_dequeue().has_value()) {} + + delete tail; + } + + void enqueue(const T& value) override { + auto* node = new Node(value); + std::lock_guard lock(tail_mu); + + tail->setNext(node); + tail = node; + + enqueue_condition.notify_one(); + } + + void enqueue(T&& value) override { + auto* node = new Node(std::move(value)); + std::lock_guard lock(tail_mu); + + tail->setNext(node); + tail = node; + + enqueue_condition.notify_one(); + } + + std::optional try_dequeue() override { + std::lock_guard lock(head_mu); + + return unsafe_dequeue(); + } + + std::future> wait_dequeue(const std::chrono::milliseconds& timeout) override { + return std::async(std::launch::async, [this, timeout]() -> std::optional { + std::unique_lock lock(head_mu); + + if (!enqueue_condition.wait_for(lock, timeout, [this]() { + return head->next() != nullptr; + })) { + return std::nullopt; + } + + return unsafe_dequeue(); + }); + } + + [[nodiscard]] bool is_empty() const override { + std::lock_guard lock(head_mu); + return head->next() == nullptr; + } + }; +} // namespace multithreading::structures::unbounded_queue \ No newline at end of file diff --git a/multithreading/structures/include/unbounded_queue/LockFreeUnboundedQueue.h b/multithreading/structures/include/unbounded_queue/LockFreeUnboundedQueue.h new file mode 100644 index 0000000..d091ba3 --- /dev/null +++ b/multithreading/structures/include/unbounded_queue/LockFreeUnboundedQueue.h @@ -0,0 +1,12 @@ +#pragma once + +#include "./UnboundedQueue.h" + + +namespace multithreading::structures::unbounded_queue { + + template + class LockFreeUnboundedQueue : public UnboundedQueue { + + }; +} // namespace multithreading::structures::unbounded_queue \ No newline at end of file diff --git a/multithreading/structures/include/unbounded_queue/UnboundedQueue.h b/multithreading/structures/include/unbounded_queue/UnboundedQueue.h new file mode 100644 index 0000000..73bee69 --- /dev/null +++ b/multithreading/structures/include/unbounded_queue/UnboundedQueue.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include + + +namespace multithreading::structures::unbounded_queue { + + template + class UnboundedQueue { + public: + virtual std::optional try_dequeue() = 0; + virtual std::future> wait_dequeue(const std::chrono::milliseconds& timeout) = 0; + virtual void enqueue(const T& value) = 0; + virtual void enqueue(T&& value) = 0; + + [[nodiscard]] virtual bool is_empty() const = 0; + }; +} // namespace multithreading::structures::unbounded_queue \ No newline at end of file diff --git a/multithreading/structures/readme.md b/multithreading/structures/readme.md new file mode 100644 index 0000000..934d670 --- /dev/null +++ b/multithreading/structures/readme.md @@ -0,0 +1,25 @@ +# LW2. Topic: Development of thread-safe data structures +## Task Statement +Develop a thread-safe data structure that supports potentially simultaneous access by multiple threads (readers and writers). The data structure is to be chosen from the provided list. +Develop a pipeline for testing the correctness and performance of the thread-safe data structure. Investigate the performance of the thread-safe data structure. As a baseline implementation for comparison, take the same structure with coarse-grained locking. Compare the performance for different numbers of threads interacting with the data structure, analyze the dependency on the ratio of reader and writer threads. Present performance metrics averaged over runs, and include graphs in the report. + +2.1 Fine-grained locking data structure + +2.2 Lock-free data structure + +## Data structures (choose one) + +**unbounded queue;** +The queue must provide pop and push methods. The pop method must be implemented both in a blocking variant (wait_pop) for the case of an empty queue, and in a non-blocking variant (try_pop). + +**bounded queue;** +The queue must have an upper size limit (the size is defined during initialization). Pop and push methods must have blocking and non-blocking variants. +The 2.2 implementation may be done with preallocated memory and based on an array. + +**singly linked list;** +Must provide insert, delete, find methods + +# From Author +As the task is completed not at the deadline and for learning purposes only, an implementation of **ALL 3** structures +will be present here. The mechanism for running pipeline will be held in the `{root}/{project_name}/utilities` library +as it will be shared across all the Laboratory works and used only in the `executables` itself. diff --git a/multithreading/structures/src/unbounded_queue/FGLockUnboundedQueue.cpp b/multithreading/structures/src/unbounded_queue/FGLockUnboundedQueue.cpp new file mode 100644 index 0000000..5d4e5d0 --- /dev/null +++ b/multithreading/structures/src/unbounded_queue/FGLockUnboundedQueue.cpp @@ -0,0 +1,7 @@ +#include "../../include/unbounded_queue/FGLockUnboundedQueue.h" + + +namespace multithreading::structures::unbounded_queue { + + +} // namespace multithreading::structures::unbounded_queue \ No newline at end of file diff --git a/multithreading/structures/src/unbounded_queue/LockFreeUnboundedQueue.cpp b/multithreading/structures/src/unbounded_queue/LockFreeUnboundedQueue.cpp new file mode 100644 index 0000000..2d6649a --- /dev/null +++ b/multithreading/structures/src/unbounded_queue/LockFreeUnboundedQueue.cpp @@ -0,0 +1,6 @@ +#include "../../include/unbounded_queue/LockFreeUnboundedQueue.h" + + +namespace multithreading::structures::unbounded_queue { + +} // namespace multithreading::structures::unbounded_queue \ No newline at end of file diff --git a/multithreading/structures/src/unbounded_queue/UnboundedQueue.cpp b/multithreading/structures/src/unbounded_queue/UnboundedQueue.cpp new file mode 100644 index 0000000..894ceb4 --- /dev/null +++ b/multithreading/structures/src/unbounded_queue/UnboundedQueue.cpp @@ -0,0 +1,6 @@ +#include "../../include/unbounded_queue/UnboundedQueue.h" + + +namespace multithreading::structures::unbounded_queue { + +} // namespace multithreading::structures::unbounded_queue \ No newline at end of file diff --git a/multithreading/structures/tests/main.cpp b/multithreading/structures/tests/main.cpp new file mode 100644 index 0000000..4aebf54 --- /dev/null +++ b/multithreading/structures/tests/main.cpp @@ -0,0 +1,9 @@ +#include + +TEST(Group1, Test1) { + EXPECT_EQ(3, 1 + 2); +} + +TEST(Group1, Test2) { + ASSERT_FLOAT_EQ(3.0F, 1.0F + 2.0F); +} \ No newline at end of file diff --git a/multithreading/utilities/include/Application.cpp b/multithreading/utilities/include/Application.cpp new file mode 100644 index 0000000..cc91bfe --- /dev/null +++ b/multithreading/utilities/include/Application.cpp @@ -0,0 +1,5 @@ +// +// Created by Nikita Lapin on 23/11/2025. +// + +#include "Application.h" diff --git a/multithreading/utilities/include/Application.h b/multithreading/utilities/include/Application.h new file mode 100644 index 0000000..15c4e7b --- /dev/null +++ b/multithreading/utilities/include/Application.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace multithreading::utilities { + + struct alignas(64) ApplicationInfo { + public: + std::string appName; + std::string appVersion; + }; + + template + class Application { + private: + ApplicationInfo appInformation; + public: + explicit Application(ApplicationInfo information) + : appInformation(std::move(information)) + {} + + std::optional SafeStart(std::function task) noexcept { + try { + return task(); + } catch (const std::exception& exception) { + const std::string prefix = "Uncaught exception at " + appInformation.appName; + std::cout << prefix << ": " << exception.what() << '\n'; + + return std::nullopt; + } + } + }; + +} // namespace multithreading::utilities From 09605aefc6d995d4f2ca5e5ab4ef2a9f4c91f903 Mon Sep 17 00:00:00 2001 From: isoldpower Date: Tue, 25 Nov 2025 20:33:58 +0500 Subject: [PATCH 02/19] feat(structures): unbounded queue lock-free implementation --- executables/unbounded_queue_benchmark.cpp | 51 +++- multithreading/structures/CMakeLists.txt | 4 +- .../unbounded_queue/FGLockUnboundedQueue.h | 69 +++-- .../unbounded_queue/LockFreeUnboundedQueue.h | 266 ++++++++++++++++++ .../include/unbounded_queue/UnboundedQueue.h | 10 +- multithreading/structures/src/placeholder.cpp | 3 + .../unbounded_queue/FGLockUnboundedQueue.cpp | 7 - .../LockFreeUnboundedQueue.cpp | 6 - .../src/unbounded_queue/UnboundedQueue.cpp | 6 - 9 files changed, 356 insertions(+), 66 deletions(-) create mode 100644 multithreading/structures/src/placeholder.cpp delete mode 100644 multithreading/structures/src/unbounded_queue/FGLockUnboundedQueue.cpp delete mode 100644 multithreading/structures/src/unbounded_queue/LockFreeUnboundedQueue.cpp delete mode 100644 multithreading/structures/src/unbounded_queue/UnboundedQueue.cpp diff --git a/executables/unbounded_queue_benchmark.cpp b/executables/unbounded_queue_benchmark.cpp index 378d5f3..add4d1f 100644 --- a/executables/unbounded_queue_benchmark.cpp +++ b/executables/unbounded_queue_benchmark.cpp @@ -1,17 +1,18 @@ -#include - #include #include -#include "multithreading/utilities/include/Application.h" - +#include +#include +#include +#include -static int runQueueMultithreading() { +template +static int runQueueMultithreading( + multithreading::structures::unbounded_queue::UnboundedQueue* queue +) { constexpr size_t PER_THREAD_SIZE = 1000; constexpr size_t DEQUEUE_TIMEOUT_MS = 1000; - multithreading::structures::unbounded_queue::FGLockUnboundedQueue queue; - const size_t threadsCount = std::thread::hardware_concurrency(); std::vector threads {}; threads.reserve(threadsCount); @@ -19,17 +20,17 @@ static int runQueueMultithreading() { for (size_t i = 0; i < threadsCount; ++i) { threads.emplace_back([&queue] { for (size_t j = 0; j < PER_THREAD_SIZE; j++) { - queue.enqueue(static_cast(j)); + queue->enqueue(static_cast(j)); } }); } for (size_t i = 0; i < PER_THREAD_SIZE * threadsCount; ++i) { - std::future> latestValue = queue.wait_dequeue( + std::future> latestValue = queue->wait_dequeue_async( std::chrono::milliseconds(DEQUEUE_TIMEOUT_MS) ); - const std::optional value = latestValue.get(); + const std::optional value = latestValue.get(); if (value.has_value()) { std::cout << "Dequeued at iteration " << i + 1 << "(" << value.value() << ")" << '\n'; } else { @@ -54,14 +55,40 @@ auto main() -> int { } ); + const std::array< + std::unique_ptr>, 2 + > queues { + std::make_unique>(), + std::make_unique>( + multithreading::structures::unbounded_queue::LockFreeQueueConfig{ + .maxUpdateDepth = 10000 + } + ) + }; + const std::optional executionResult = benchmarkApplication.SafeStart( - runQueueMultithreading + [&]() -> int { + return runQueueMultithreading(queues.at(0).get()); + } + ); + const std::optional executionResult2 = benchmarkApplication.SafeStart( + [&]() -> int { + return runQueueMultithreading(queues.at(1).get()); + } ); try { - return executionResult.has_value() ? executionResult.value() : -1; + if (executionResult.has_value()) { + return executionResult.value(); + } + if (executionResult2.has_value()) { + return executionResult2.value(); + } + + return -1; } catch (std::exception& e) { std::cerr << e.what() << '\n'; + return -1; } } \ No newline at end of file diff --git a/multithreading/structures/CMakeLists.txt b/multithreading/structures/CMakeLists.txt index c280453..79b7a12 100644 --- a/multithreading/structures/CMakeLists.txt +++ b/multithreading/structures/CMakeLists.txt @@ -1,7 +1,5 @@ add_library(utilities - src/unbounded_queue/UnboundedQueue.cpp - src/unbounded_queue/FGLockUnboundedQueue.cpp - src/unbounded_queue/LockFreeUnboundedQueue.cpp) + src/placeholder.cpp) if (BUILD_TESTING) add_executable(tests tests/main.cpp) diff --git a/multithreading/structures/include/unbounded_queue/FGLockUnboundedQueue.h b/multithreading/structures/include/unbounded_queue/FGLockUnboundedQueue.h index 210167b..79e7c42 100644 --- a/multithreading/structures/include/unbounded_queue/FGLockUnboundedQueue.h +++ b/multithreading/structures/include/unbounded_queue/FGLockUnboundedQueue.h @@ -7,17 +7,17 @@ namespace multithreading::structures::unbounded_queue { template - struct Node { + struct FGNode { private: T value; - Node* nextNode; + FGNode* nextNode; public: - explicit Node(const T& value) + explicit FGNode(const T& value) : value(value) , nextNode(nullptr) {} - explicit Node(T&& value) + explicit FGNode(T&& value) : value(std::move(value)) , nextNode(nullptr) {} @@ -26,11 +26,11 @@ namespace multithreading::structures::unbounded_queue { return value; } - [[nodiscard]] Node* next() const { + [[nodiscard]] FGNode* next() const { return nextNode; } - void setNext(Node* next) { + void setNext(FGNode* next) { this->nextNode = next; } }; @@ -43,12 +43,19 @@ namespace multithreading::structures::unbounded_queue { std::condition_variable enqueue_condition; - Node* head; - Node* tail; + FGNode* head; + FGNode* tail; + + void unsafe_enqueue(FGNode* node) { + tail->setNext(node); + tail = node; + + enqueue_condition.notify_one(); + } std::optional unsafe_dequeue() { - Node* dummyNode = head; - Node* firstValuableNode = dummyNode->next(); + FGNode* dummyNode = head; + FGNode* firstValuableNode = dummyNode->next(); if (firstValuableNode == nullptr) { return std::nullopt; @@ -62,7 +69,7 @@ namespace multithreading::structures::unbounded_queue { } public: FGLockUnboundedQueue() { - head = tail = new Node(T{}); + head = tail = new FGNode(T{}); } ~FGLockUnboundedQueue() { @@ -72,23 +79,17 @@ namespace multithreading::structures::unbounded_queue { } void enqueue(const T& value) override { - auto* node = new Node(value); + auto* node = new FGNode(value); std::lock_guard lock(tail_mu); - tail->setNext(node); - tail = node; - - enqueue_condition.notify_one(); + unsafe_enqueue(node); } void enqueue(T&& value) override { - auto* node = new Node(std::move(value)); + auto* node = new FGNode(std::move(value)); std::lock_guard lock(tail_mu); - tail->setNext(node); - tail = node; - - enqueue_condition.notify_one(); + unsafe_enqueue(node); } std::optional try_dequeue() override { @@ -97,17 +98,25 @@ namespace multithreading::structures::unbounded_queue { return unsafe_dequeue(); } - std::future> wait_dequeue(const std::chrono::milliseconds& timeout) override { - return std::async(std::launch::async, [this, timeout]() -> std::optional { - std::unique_lock lock(head_mu); + std::optional wait_dequeue( + const std::chrono::steady_clock::duration& timeout + ) override { + std::unique_lock lock(head_mu); + + if (!enqueue_condition.wait_for(lock, timeout, [this]() { + return head->next() != nullptr; + })) { + return std::nullopt; + } - if (!enqueue_condition.wait_for(lock, timeout, [this]() { - return head->next() != nullptr; - })) { - return std::nullopt; - } + return unsafe_dequeue(); + } - return unsafe_dequeue(); + std::future> wait_dequeue_async( + const std::chrono::steady_clock::duration& timeout + ) override { + return std::async(std::launch::async, [this, timeout]() -> std::optional { + return this->wait_dequeue(timeout); }); } diff --git a/multithreading/structures/include/unbounded_queue/LockFreeUnboundedQueue.h b/multithreading/structures/include/unbounded_queue/LockFreeUnboundedQueue.h index d091ba3..4ebdbda 100644 --- a/multithreading/structures/include/unbounded_queue/LockFreeUnboundedQueue.h +++ b/multithreading/structures/include/unbounded_queue/LockFreeUnboundedQueue.h @@ -1,12 +1,278 @@ #pragma once +#include + #include "./UnboundedQueue.h" namespace multithreading::structures::unbounded_queue { + struct alignas(8) LockFreeQueueConfig { + size_t maxUpdateDepth; + }; + + template + struct LockFreeNode { + private: + T value; + std::atomic nextNode; + public: + explicit LockFreeNode(const T& value) + : value(value) + , nextNode(nullptr) + {} + + explicit LockFreeNode(T&& value) + : value(std::move(value)) + , nextNode(nullptr) + {} + + LockFreeNode() + : value(T{}) + , nextNode(nullptr) + {} + + [[nodiscard]] T& get() { + return value; + } + + [[nodiscard]] LockFreeNode* next() const { + return nextNode.load(std::memory_order_acquire); + } + + [[nodiscard]] std::atomic& nextAtomic() { + return nextNode; + } + + void setNext(LockFreeNode* next) { + this->nextNode.store(next, std::memory_order_release); + } + }; + + constexpr size_t DEFAULT_MAX_ATTEMPTS = 100; + template class LockFreeUnboundedQueue : public UnboundedQueue { + private: + size_t maxAlgorithmDepth; + + std::atomic*> head; + std::atomic*> tail; + std::counting_semaphore<> items_available; + + void enqueue_node(LockFreeNode* newNode) { + size_t iterator = 0; + + while (iterator < maxAlgorithmDepth) { + iterator++; + LockFreeNode* last = tail.load(std::memory_order_acquire); + LockFreeNode* next = last->next(); + + // If the tail updated between reads, then we want to + // retry reaching real tail in the next iteration + if (last == tail.load()) { + if (next == nullptr) { + LockFreeNode* expected = nullptr; + if (last->nextAtomic().compare_exchange_weak( + expected, + newNode, + std::memory_order_release, + std::memory_order_acquire + )) { + // Write successful, try to update the tail + tail.compare_exchange_weak( + last, + newNode, + std::memory_order_release, + std::memory_order_acquire + ); + return; + } + } else { + // At this point our tail didn't change, but the reference to next + // got updated on tail. So we want to help our tail to be up-to-date. + tail.compare_exchange_weak( + last, + next, + std::memory_order_release, + std::memory_order_acquire + ); + } + } + } + + throw std::runtime_error("Maximum enqueue_node() depth exceeded. It usually means that" + " there are a lot of threads competing at once. Consider" + " manually increasing the 'maxDepth' property in Queue" + " constructor to bypass this bottleneck"); + } + + std::optional dequeue_node() { + size_t iterator = 0; + + while (iterator < maxAlgorithmDepth) { + iterator++; + LockFreeNode* first = head.load(std::memory_order_acquire); + LockFreeNode* last = tail.load(std::memory_order_acquire); + LockFreeNode* firstValuable = first->next(); + + // Check if data is valid and didn't change between reads. If it did - just continue + // and redo the action in the next iteration. + if (first == head.load(std::memory_order_acquire)) { + if (first == last) { + if (firstValuable == nullptr) { + // If our head equals to tail - it means that the queue is empty + // and they reference the dummy node. + return std::nullopt; + } else { + // But if the firstValuable node is not nullptr - it means that tail + // wasn't updated, so its lagging behind. Help advance it + tail.compare_exchange_weak( + last, + firstValuable, + std::memory_order_release, + std::memory_order_acquire + ); + // We ignore if it fails because another thread will simply + // handle it in the future anyways + } + } else { + T value = firstValuable->get(); + + if (head.compare_exchange_weak( + first, + firstValuable, + std::memory_order_release, + std::memory_order_acquire + )) { + // We successfully retrieved the first element and replaced the head + // reference. Now cleaning the references. + delete first; + return value; + } else { + // We lost the race condition to another thread - it dequeued before + // we did. Simply continue the loop to try one more time. + continue; + } + } + } else { + // Consistency check failed, data is invalid - continue the loop to try one + // more time. + continue; + } + } + + throw std::runtime_error("Maximum dequeue_node() depth exceeded. It usually means that" + " there are a lot of threads competing at once. Consider" + " manually increasing the 'maxDepth' property in Queue" + " constructor to bypass this bottleneck"); + } + public: + LockFreeUnboundedQueue() + : maxAlgorithmDepth(DEFAULT_MAX_ATTEMPTS) + , items_available(0) + { + auto* dummy = new LockFreeNode(); + head.store(dummy, std::memory_order_relaxed); + tail.store(dummy, std::memory_order_relaxed); + } + + LockFreeUnboundedQueue(const LockFreeQueueConfig& config) + : maxAlgorithmDepth(config.maxUpdateDepth) + , items_available(0) + { + auto* dummy = new LockFreeNode(); + head.store(dummy, std::memory_order_relaxed); + tail.store(dummy, std::memory_order_relaxed); + } + + ~LockFreeUnboundedQueue() { + while (LockFreeUnboundedQueue::try_dequeue().has_value()) {} + + const LockFreeNode* dummy = head.load(std::memory_order_relaxed); + delete dummy; + } + + void enqueue(const T& value) override { + auto* newNode = new LockFreeNode(value); + enqueue_node(newNode); + + // Tell semaphore we have 1 item released + items_available.release(1); + } + + void enqueue(T&& value) override { + auto* newNode = new LockFreeNode(std::move(value)); + enqueue_node(newNode); + + // Tell semaphore we have 1 item released + items_available.release(1); + } + + std::optional try_dequeue() override { + return dequeue_node(); + } + + std::optional wait_dequeue( + const std::chrono::steady_clock::duration& timeout + ) override { + const auto deadline = std::chrono::steady_clock::now() + timeout; + + // We don't check the time equality inside the while loop as it is ID-based and + // affects performance in a bad way. + while (true) { + const auto now_time = std::chrono::steady_clock::now(); + if (now_time >= deadline) { + // Time limit exceeded, break from cycle. + break; + } + + // Wait for time left after previous iterations. + if (items_available.try_acquire_for(deadline - now_time)) { + // If the value added to semaphore - try to dequeue it. It can still fail + // in a racing condition with common .try_dequeue() callers. + if (auto result = try_dequeue(); result.has_value()) { + // Dequeue succeeded - return the result of operation. + return result; + } + } else { + // Time limit exceeded, break from cycle. + break; + } + } + + // Final attempt to dequeue the value. + return try_dequeue(); + } + + std::future> wait_dequeue_async( + const std::chrono::steady_clock::duration& timeout + ) override { + return std::async(std::launch::async, [this, timeout]() -> std::optional { + return this->wait_dequeue(timeout); + }); + } + + bool is_empty() const override { + size_t iterator = 0; + + while (iterator < maxAlgorithmDepth) { + iterator++; + + LockFreeNode* first = head.load(std::memory_order_acquire); + LockFreeNode* last = tail.load(std::memory_order_acquire); + + // Memory consistency check. We want to make sure that the first didn't change + // while we were reading last. + if (first == head.load(std::memory_order_acquire)) { + return first == last; + } + } + throw std::runtime_error("Maximum is_empty() depth exceeded. It usually means that" + " there are a lot of threads competing at once. Consider" + " manually increasing the 'maxDepth' property in Queue" + " constructor to bypass this bottleneck"); + } }; } // namespace multithreading::structures::unbounded_queue \ No newline at end of file diff --git a/multithreading/structures/include/unbounded_queue/UnboundedQueue.h b/multithreading/structures/include/unbounded_queue/UnboundedQueue.h index 73bee69..3beff9b 100644 --- a/multithreading/structures/include/unbounded_queue/UnboundedQueue.h +++ b/multithreading/structures/include/unbounded_queue/UnboundedQueue.h @@ -1,7 +1,6 @@ #pragma once #include -#include namespace multithreading::structures::unbounded_queue { @@ -9,8 +8,15 @@ namespace multithreading::structures::unbounded_queue { template class UnboundedQueue { public: + virtual ~UnboundedQueue() = default; + virtual std::optional try_dequeue() = 0; - virtual std::future> wait_dequeue(const std::chrono::milliseconds& timeout) = 0; + virtual std::optional wait_dequeue( + const std::chrono::steady_clock::duration& timeout + ) = 0; + virtual std::future> wait_dequeue_async( + const std::chrono::steady_clock::duration& timeout + ) = 0; virtual void enqueue(const T& value) = 0; virtual void enqueue(T&& value) = 0; diff --git a/multithreading/structures/src/placeholder.cpp b/multithreading/structures/src/placeholder.cpp new file mode 100644 index 0000000..701c022 --- /dev/null +++ b/multithreading/structures/src/placeholder.cpp @@ -0,0 +1,3 @@ +#include "../include/unbounded_queue/FGLockUnboundedQueue.h" +#include "../include/unbounded_queue/LockFreeUnboundedQueue.h" +#include "../include/unbounded_queue/UnboundedQueue.h" \ No newline at end of file diff --git a/multithreading/structures/src/unbounded_queue/FGLockUnboundedQueue.cpp b/multithreading/structures/src/unbounded_queue/FGLockUnboundedQueue.cpp deleted file mode 100644 index 5d4e5d0..0000000 --- a/multithreading/structures/src/unbounded_queue/FGLockUnboundedQueue.cpp +++ /dev/null @@ -1,7 +0,0 @@ -#include "../../include/unbounded_queue/FGLockUnboundedQueue.h" - - -namespace multithreading::structures::unbounded_queue { - - -} // namespace multithreading::structures::unbounded_queue \ No newline at end of file diff --git a/multithreading/structures/src/unbounded_queue/LockFreeUnboundedQueue.cpp b/multithreading/structures/src/unbounded_queue/LockFreeUnboundedQueue.cpp deleted file mode 100644 index 2d6649a..0000000 --- a/multithreading/structures/src/unbounded_queue/LockFreeUnboundedQueue.cpp +++ /dev/null @@ -1,6 +0,0 @@ -#include "../../include/unbounded_queue/LockFreeUnboundedQueue.h" - - -namespace multithreading::structures::unbounded_queue { - -} // namespace multithreading::structures::unbounded_queue \ No newline at end of file diff --git a/multithreading/structures/src/unbounded_queue/UnboundedQueue.cpp b/multithreading/structures/src/unbounded_queue/UnboundedQueue.cpp deleted file mode 100644 index 894ceb4..0000000 --- a/multithreading/structures/src/unbounded_queue/UnboundedQueue.cpp +++ /dev/null @@ -1,6 +0,0 @@ -#include "../../include/unbounded_queue/UnboundedQueue.h" - - -namespace multithreading::structures::unbounded_queue { - -} // namespace multithreading::structures::unbounded_queue \ No newline at end of file From f826b6dc325a4d82e9e743507f82fe0220ac91b5 Mon Sep 17 00:00:00 2001 From: isoldpower Date: Sun, 30 Nov 2025 20:04:23 +0500 Subject: [PATCH 03/19] feat(structures): bounded queue fine-grained lock implementation --- executables/CMakeLists.txt | 3 + executables/bounded_queue_benchmark.cpp | 82 ++++++ .../include/bounded_queue/BoundedQueue.h | 27 ++ .../bounded_queue/FGLockBoundedQueue.h | 261 ++++++++++++++++++ .../bounded_queue/LockFreeBoundedQueue.h | 59 ++++ .../unbounded_queue/FGLockUnboundedQueue.h | 90 ++++-- .../unbounded_queue/LockFreeUnboundedQueue.h | 103 ++++--- .../include/unbounded_queue/UnboundedQueue.h | 2 +- .../utilities/include/Application.h | 2 +- 9 files changed, 567 insertions(+), 62 deletions(-) create mode 100644 executables/bounded_queue_benchmark.cpp create mode 100644 multithreading/structures/include/bounded_queue/BoundedQueue.h create mode 100644 multithreading/structures/include/bounded_queue/FGLockBoundedQueue.h create mode 100644 multithreading/structures/include/bounded_queue/LockFreeBoundedQueue.h diff --git a/executables/CMakeLists.txt b/executables/CMakeLists.txt index 6144241..9f55ffb 100644 --- a/executables/CMakeLists.txt +++ b/executables/CMakeLists.txt @@ -1,2 +1,5 @@ add_executable(unbounded_queue_benchmark unbounded_queue_benchmark.cpp) +add_executable(bounded_queue_benchmark bounded_queue_benchmark.cpp) + target_link_libraries(unbounded_queue_benchmark PRIVATE utilities cxx_setup) +target_link_libraries(bounded_queue_benchmark PRIVATE utilities cxx_setup) \ No newline at end of file diff --git a/executables/bounded_queue_benchmark.cpp b/executables/bounded_queue_benchmark.cpp new file mode 100644 index 0000000..62822b2 --- /dev/null +++ b/executables/bounded_queue_benchmark.cpp @@ -0,0 +1,82 @@ +#include +#include + +#include +#include +#include +#include + +constexpr size_t PER_THREAD_SIZE = 1000; +constexpr size_t DEQUEUE_TIMEOUT_MS = 1000; +constexpr size_t QUEUE_SIZE = 10000; + +template +static int runQueueMultithreading( + multithreading::structures::bounded_queue::BoundedQueue* queue +) { + const size_t threadsCount = std::thread::hardware_concurrency(); + std::vector threads {}; + threads.reserve(threadsCount); + + for (size_t i = 0; i < threadsCount; ++i) { + threads.emplace_back([&queue] { + for (size_t j = 0; j < PER_THREAD_SIZE; j++) { + queue->try_enqueue(static_cast(j)); + } + }); + } + + for (size_t i = 0; i < PER_THREAD_SIZE * threadsCount; ++i) { + std::future> latestValue = queue->wait_dequeue_async( + std::chrono::milliseconds(DEQUEUE_TIMEOUT_MS) + ); + + const std::optional value = latestValue.get(); + if (value.has_value()) { + std::cout << "Dequeued at iteration " << i + 1 << "(" << value.value() << ")" << '\n'; + } else { + i--; + } + } + + for (size_t i = 0; i < threadsCount; ++i) { + if (threads[i].joinable()) { + threads[i].join(); + } + } + + return 0; +} + +auto main() -> int { + multithreading::utilities::Application benchmarkApplication( + multithreading::utilities::ApplicationInfo{ + .appName="Bounded Queue Benchmark", + .appVersion="1.0.0" + } + ); + + const std::array< + std::unique_ptr>, 1 + > queues { + std::make_unique>(QUEUE_SIZE) + }; + + const std::optional executionResult = benchmarkApplication.SafeStart( + [&]() -> int { + return runQueueMultithreading(queues.at(0).get()); + } + ); + + try { + if (executionResult.has_value()) { + return executionResult.value(); + } + + return -1; + } catch (std::exception& e) { + std::cerr << e.what() << '\n'; + + return -1; + } +} \ No newline at end of file diff --git a/multithreading/structures/include/bounded_queue/BoundedQueue.h b/multithreading/structures/include/bounded_queue/BoundedQueue.h new file mode 100644 index 0000000..1051df6 --- /dev/null +++ b/multithreading/structures/include/bounded_queue/BoundedQueue.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include + + +namespace multithreading::structures::bounded_queue { + + template + class BoundedQueue { + public: + virtual ~BoundedQueue() = default; + + virtual std::optional try_dequeue() = 0; + virtual std::optional wait_dequeue( + const std::chrono::steady_clock::duration& timeout + ) = 0; + virtual std::future> wait_dequeue_async( + const std::chrono::steady_clock::duration& timeout + ) = 0; + virtual bool try_enqueue(const T& value) = 0; + virtual bool try_enqueue(T&& value) = 0; + + [[nodiscard]] virtual bool is_empty(bool isPrecise) const = 0; + [[nodiscard]] virtual bool is_full(bool isPrecise) const = 0; + }; +} // namespace multithreading::structures::bounded_queue \ No newline at end of file diff --git a/multithreading/structures/include/bounded_queue/FGLockBoundedQueue.h b/multithreading/structures/include/bounded_queue/FGLockBoundedQueue.h new file mode 100644 index 0000000..313a428 --- /dev/null +++ b/multithreading/structures/include/bounded_queue/FGLockBoundedQueue.h @@ -0,0 +1,261 @@ +#pragma once + +#include +#include +#include + +#include "./BoundedQueue.h" + +namespace multithreading::structures::bounded_queue { + + template + struct FGNode { + private: + T value; + FGNode* next_node; + public: + explicit FGNode(const T& value) + : value(value) + , next_node(nullptr) + {} + + explicit FGNode(T&& value) + : value(std::move(value)) + , next_node(nullptr) + {} + + FGNode() + : value(T{}) + , next_node(nullptr) + {} + + FGNode(const T& value, FGNode* nextNode) + : value(value) + , next_node(nextNode) + {} + + void setNext(FGNode* node) { + this->next_node = node; + } + + [[nodiscard]] FGNode* next() const { + return next_node; + } + + [[nodiscard]] const T& get() const { + return value; + } + + [[nodiscard]] T& get() { + return value; + } + }; + + template + class FGLockBoundedQueueImpl { + private: + const size_t size_limit; + FGNode* head; + FGNode* tail; + + std::atomic size_counter; + std::atomic is_shutdown; + std::condition_variable enqueue_condition; + mutable std::mutex tail_mu; + mutable std::mutex head_mu; + + std::optional unsafe_dequeue() { + FGNode* dummyNode = head; + FGNode* firstValuableNode = dummyNode->next(); + + if (firstValuableNode == nullptr) { + return std::nullopt; + } + + std::optional result = std::move(firstValuableNode->get()); + head = firstValuableNode; + delete dummyNode; + + return result; + } + + bool is_operations_allowed() const { + return !is_shutdown.load(std::memory_order_acquire); + } + + bool unsafe_is_empty() const { + return head->next() == nullptr; + } + public: + explicit FGLockBoundedQueueImpl(const size_t sizeLimit) + : size_limit(sizeLimit) + { + head = tail = new FGNode(); + } + + ~FGLockBoundedQueueImpl() { + is_shutdown.store(true, std::memory_order_release); + // Notify everyone that the state changed. They will wake up because now + // operations are not allowed. + enqueue_condition.notify_all(); + + std::lock_guard head_lock(head_mu); + std::lock_guard tail_lock(tail_mu); + while (head != tail) { + FGNode* dummy = head; + head = head->next(); + delete dummy; + } + delete tail; + } + + std::optional try_dequeue() { + // Quick check to decrease amount of mutex-waiting when queue is obviously empty + if (size_counter.load(std::memory_order_relaxed) == 0) { + return std::nullopt; + } + + std::lock_guard lock(head_mu); + if (!is_operations_allowed() || head->next() == nullptr) { + // Queue is empty or shutting down, even though our first counter check succeeded. + // Probably means that the amount of elements were 1, and it decreased while we were + // waiting for mutex unlock (was busy with dequeuing the last element). + return std::nullopt; + } + + auto result = unsafe_dequeue(); + size_counter.fetch_sub(1, std::memory_order_relaxed); + return result; + } + + std::optional wait_dequeue( + const std::chrono::steady_clock::duration& timeout + ) { + // Lock the mutex, wait for new item to be enqueued. Return std::nullopt if + // the time limit exceeded. + std::unique_lock lock(head_mu); + if (!enqueue_condition.wait_for(lock, timeout, [this]() { + return !is_operations_allowed() || !unsafe_is_empty(); + })) { + return std::nullopt; + } + + // Check if we woke up because operations are not allowed. + if (!is_operations_allowed()) { + return std::nullopt; + } + + auto result = unsafe_dequeue(); + if (result.has_value()) { + size_counter.fetch_sub(1, std::memory_order_relaxed); + } + + return result; + } + + bool try_enqueue(FGNode* node) { + // Quick check to decrease amount of mutex-waiting when queue is obviously full + if (size_counter.load(std::memory_order_relaxed) >= size_limit) { + return false; + } + + std::lock_guard lock(tail_mu); + if (!is_operations_allowed()) { + return false; + } + // Acquire lock, ensure queue has space to add element and increment the counter + // if it does. Rollback and return false otherwise. + if (size_counter.fetch_add(1, std::memory_order_relaxed) >= size_limit) { + size_counter.fetch_sub(1, std::memory_order_relaxed); + return false; + } + + tail->setNext(node); + tail = node; + + enqueue_condition.notify_one(); + return true; + } + + bool is_empty() const { + return size_counter.load(std::memory_order_relaxed) == 0; + } + + bool is_empty_precise() const { + std::lock_guard lock(head_mu); + return unsafe_is_empty(); + } + + bool is_full() const { + return size_counter.load(std::memory_order_relaxed) >= size_limit; + } + + bool is_full_precise() const { + std::lock_guard lock(tail_mu); + return is_full(); + } + }; + + template + class FGLockBoundedQueue final : public BoundedQueue { + private: + FGLockBoundedQueueImpl impl; + public: + explicit FGLockBoundedQueue(const size_t sizeLimit) noexcept + : impl(sizeLimit) + {} + + FGLockBoundedQueue(FGLockBoundedQueue&& other) = delete; + FGLockBoundedQueue(const FGLockBoundedQueue& other) = delete; + FGLockBoundedQueue& operator=(const FGLockBoundedQueue& other) = default; + FGLockBoundedQueue& operator=(FGLockBoundedQueue&& other) = delete; + + ~FGLockBoundedQueue() override = default; + + std::optional try_dequeue() override { + return impl.try_dequeue(); + } + + std::optional wait_dequeue( + const std::chrono::steady_clock::duration& timeout + ) override { + return impl.wait_dequeue(timeout); + } + + std::future> wait_dequeue_async( + const std::chrono::steady_clock::duration& timeout + ) override { + return std::async(std::launch::async, [this, timeout]() { + return this->impl.wait_dequeue(timeout); + }); + } + + bool try_enqueue(const T& value) override { + auto* node = new FGNode(value); + if (!impl.try_enqueue(node)) { + delete node; + return false; + } + + return true; + } + + bool try_enqueue(T&& value) override { + auto* node = new FGNode(std::move(value)); + if (!impl.try_enqueue(node)) { + delete node; + return false; + } + + return true; + } + + [[nodiscard]] bool is_empty(bool isPrecise) const override { + return isPrecise ? impl.is_empty_precise() : impl.is_empty(); + } + + [[nodiscard]] bool is_full(bool isPrecise) const override { + return isPrecise ? impl.is_full_precise() : impl.is_full(); + } + }; +} // namespace multithreading::structures::bounded_queue \ No newline at end of file diff --git a/multithreading/structures/include/bounded_queue/LockFreeBoundedQueue.h b/multithreading/structures/include/bounded_queue/LockFreeBoundedQueue.h new file mode 100644 index 0000000..1dc519f --- /dev/null +++ b/multithreading/structures/include/bounded_queue/LockFreeBoundedQueue.h @@ -0,0 +1,59 @@ +#pragma once + +#include +#include + +#include "./BoundedQueue.h" + +namespace multithreading::structures::bounded_queue { + + template + class LockFreeBoundedQueue final : public BoundedQueue { + private: + const size_t size_limit; + public: + explicit LockFreeBoundedQueue(size_t sizeLimit) + : size_limit(sizeLimit) + {} + + LockFreeBoundedQueue(LockFreeBoundedQueue&&) = delete; + LockFreeBoundedQueue(const LockFreeBoundedQueue&) = delete; + LockFreeBoundedQueue& operator=(const LockFreeBoundedQueue&) = delete; + LockFreeBoundedQueue& operator=(LockFreeBoundedQueue&&) = delete; + + ~LockFreeBoundedQueue() override = default; + + std::optional try_dequeue() override { + + } + + std::optional wait_dequeue( + const std::chrono::steady_clock::duration& timeout + ) override { + + } + + std::future> wait_dequeue_async( + const std::chrono::steady_clock::duration& timeout + ) override { + + } + + bool try_enqueue(const T& value) override { + + } + + bool try_enqueue(T&& value) override { + + } + + + bool is_empty(bool isPrecise) const override { + + } + + bool is_full(bool isPrecise) const override { + + } + }; +} // namespace multithreading::structures::bounded_queue \ No newline at end of file diff --git a/multithreading/structures/include/unbounded_queue/FGLockUnboundedQueue.h b/multithreading/structures/include/unbounded_queue/FGLockUnboundedQueue.h index 79e7c42..b121ea9 100644 --- a/multithreading/structures/include/unbounded_queue/FGLockUnboundedQueue.h +++ b/multithreading/structures/include/unbounded_queue/FGLockUnboundedQueue.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include "./UnboundedQueue.h" @@ -22,6 +22,11 @@ namespace multithreading::structures::unbounded_queue { , nextNode(nullptr) {} + FGNode() + : value(T{}) + , nextNode(nullptr) + {} + [[nodiscard]] T& get() { return value; } @@ -36,8 +41,8 @@ namespace multithreading::structures::unbounded_queue { }; template - class FGLockUnboundedQueue : public UnboundedQueue { - private: + class FGLockUnboundedQueueImpl { + private: mutable std::mutex head_mu; mutable std::mutex tail_mu; @@ -67,32 +72,34 @@ namespace multithreading::structures::unbounded_queue { return result; } + + bool unsafe_is_empty() const { + return head->next() == nullptr; + } public: - FGLockUnboundedQueue() { - head = tail = new FGNode(T{}); + FGLockUnboundedQueueImpl() { + head = tail = new FGNode(); } - ~FGLockUnboundedQueue() { - while (FGLockUnboundedQueue::try_dequeue().has_value()) {} + ~FGLockUnboundedQueueImpl() { + std::lock_guard head_lock(head_mu); + std::lock_guard tail_lock(tail_mu); + while (head != tail) { + FGNode* dummy = head; + head = head->next(); + delete dummy; + } delete tail; } - void enqueue(const T& value) override { - auto* node = new FGNode(value); - std::lock_guard lock(tail_mu); - - unsafe_enqueue(node); - } - - void enqueue(T&& value) override { - auto* node = new FGNode(std::move(value)); + void enqueue(FGNode* node) { std::lock_guard lock(tail_mu); unsafe_enqueue(node); } - std::optional try_dequeue() override { + std::optional try_dequeue() { std::lock_guard lock(head_mu); return unsafe_dequeue(); @@ -100,11 +107,11 @@ namespace multithreading::structures::unbounded_queue { std::optional wait_dequeue( const std::chrono::steady_clock::duration& timeout - ) override { + ) { std::unique_lock lock(head_mu); if (!enqueue_condition.wait_for(lock, timeout, [this]() { - return head->next() != nullptr; + return !this->unsafe_is_empty(); })) { return std::nullopt; } @@ -112,17 +119,56 @@ namespace multithreading::structures::unbounded_queue { return unsafe_dequeue(); } + [[nodiscard]] bool is_empty() const { + std::lock_guard lock(head_mu); + return unsafe_is_empty(); + } + }; + + template + class FGLockUnboundedQueue final : public UnboundedQueue { + private: + FGLockUnboundedQueueImpl impl; + public: + FGLockUnboundedQueue() noexcept = default; + + FGLockUnboundedQueue(FGLockUnboundedQueue&& other) = delete; + FGLockUnboundedQueue(const FGLockUnboundedQueue& other) = delete; + FGLockUnboundedQueue& operator=(const FGLockUnboundedQueue& other) = delete; + FGLockUnboundedQueue& operator=(FGLockUnboundedQueue&& other) = delete; + + ~FGLockUnboundedQueue() override = default; + + void enqueue(const T& value) override { + auto* node = new FGNode(value); + impl.enqueue(node); + } + + void enqueue(T&& value) override { + auto* node = new FGNode(std::move(value)); + impl.enqueue(node); + } + + std::optional try_dequeue() override { + return impl.try_dequeue(); + } + + std::optional wait_dequeue( + const std::chrono::steady_clock::duration& timeout + ) override { + return impl.wait_dequeue(timeout); + } + std::future> wait_dequeue_async( const std::chrono::steady_clock::duration& timeout ) override { return std::async(std::launch::async, [this, timeout]() -> std::optional { - return this->wait_dequeue(timeout); + return impl.wait_dequeue(timeout); }); } [[nodiscard]] bool is_empty() const override { - std::lock_guard lock(head_mu); - return head->next() == nullptr; + return impl.is_empty(); } }; } // namespace multithreading::structures::unbounded_queue \ No newline at end of file diff --git a/multithreading/structures/include/unbounded_queue/LockFreeUnboundedQueue.h b/multithreading/structures/include/unbounded_queue/LockFreeUnboundedQueue.h index 4ebdbda..65bdf68 100644 --- a/multithreading/structures/include/unbounded_queue/LockFreeUnboundedQueue.h +++ b/multithreading/structures/include/unbounded_queue/LockFreeUnboundedQueue.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include "./UnboundedQueue.h" @@ -52,7 +53,7 @@ namespace multithreading::structures::unbounded_queue { constexpr size_t DEFAULT_MAX_ATTEMPTS = 100; template - class LockFreeUnboundedQueue : public UnboundedQueue { + class LockFreeUnboundedQueueImpl { private: size_t maxAlgorithmDepth; @@ -168,8 +169,8 @@ namespace multithreading::structures::unbounded_queue { " constructor to bypass this bottleneck"); } public: - LockFreeUnboundedQueue() - : maxAlgorithmDepth(DEFAULT_MAX_ATTEMPTS) + explicit LockFreeUnboundedQueueImpl(const size_t maxAlgorithmDepth) + : maxAlgorithmDepth(maxAlgorithmDepth) , items_available(0) { auto* dummy = new LockFreeNode(); @@ -177,45 +178,26 @@ namespace multithreading::structures::unbounded_queue { tail.store(dummy, std::memory_order_relaxed); } - LockFreeUnboundedQueue(const LockFreeQueueConfig& config) - : maxAlgorithmDepth(config.maxUpdateDepth) - , items_available(0) - { - auto* dummy = new LockFreeNode(); - head.store(dummy, std::memory_order_relaxed); - tail.store(dummy, std::memory_order_relaxed); - } - - ~LockFreeUnboundedQueue() { - while (LockFreeUnboundedQueue::try_dequeue().has_value()) {} + ~LockFreeUnboundedQueueImpl() { + while (LockFreeUnboundedQueueImpl::try_dequeue().has_value()) {} const LockFreeNode* dummy = head.load(std::memory_order_relaxed); delete dummy; } - void enqueue(const T& value) override { - auto* newNode = new LockFreeNode(value); - enqueue_node(newNode); - - // Tell semaphore we have 1 item released - items_available.release(1); - } - - void enqueue(T&& value) override { - auto* newNode = new LockFreeNode(std::move(value)); - enqueue_node(newNode); - - // Tell semaphore we have 1 item released + void enqueue(LockFreeNode* node) { + enqueue_node(node); + // Tell semaphore we have 1 item enqueued items_available.release(1); } - std::optional try_dequeue() override { + std::optional try_dequeue() { return dequeue_node(); } std::optional wait_dequeue( const std::chrono::steady_clock::duration& timeout - ) override { + ) { const auto deadline = std::chrono::steady_clock::now() + timeout; // We don't check the time equality inside the while loop as it is ID-based and @@ -245,15 +227,7 @@ namespace multithreading::structures::unbounded_queue { return try_dequeue(); } - std::future> wait_dequeue_async( - const std::chrono::steady_clock::duration& timeout - ) override { - return std::async(std::launch::async, [this, timeout]() -> std::optional { - return this->wait_dequeue(timeout); - }); - } - - bool is_empty() const override { + bool is_empty() const { size_t iterator = 0; while (iterator < maxAlgorithmDepth) { @@ -275,4 +249,57 @@ namespace multithreading::structures::unbounded_queue { " constructor to bypass this bottleneck"); } }; + + template + class LockFreeUnboundedQueue final : public UnboundedQueue { + private: + LockFreeUnboundedQueueImpl impl; + public: + explicit LockFreeUnboundedQueue(const LockFreeQueueConfig& config) noexcept + : impl({ config.maxUpdateDepth }) + {} + + LockFreeUnboundedQueue() noexcept + : impl({ DEFAULT_MAX_ATTEMPTS }) + {} + + LockFreeUnboundedQueue(LockFreeUnboundedQueue&& other) = delete; + LockFreeUnboundedQueue(const LockFreeUnboundedQueue& other) = delete; + LockFreeUnboundedQueue& operator=(const LockFreeUnboundedQueue& other) = delete; + LockFreeUnboundedQueue& operator=(LockFreeUnboundedQueue&& other) = delete; + + ~LockFreeUnboundedQueue() override = default; + + void enqueue(const T& value) override { + auto* newNode = new LockFreeNode(value); + impl.enqueue(newNode); + } + + void enqueue(T&& value) override { + auto* newNode = new LockFreeNode(std::move(value)); + impl.enqueue(newNode); + } + + std::optional try_dequeue() override { + return impl.try_dequeue(); + } + + std::optional wait_dequeue( + const std::chrono::steady_clock::duration& timeout + ) override { + return impl.wait_dequeue(timeout); + } + + std::future> wait_dequeue_async( + const std::chrono::steady_clock::duration& timeout + ) override { + return std::async(std::launch::async, [this, timeout]() -> std::optional { + return this->impl.wait_dequeue(timeout); + }); + } + + [[nodiscard]] bool is_empty() const override { + return impl.is_empty(); + } + }; } // namespace multithreading::structures::unbounded_queue \ No newline at end of file diff --git a/multithreading/structures/include/unbounded_queue/UnboundedQueue.h b/multithreading/structures/include/unbounded_queue/UnboundedQueue.h index 3beff9b..565ef30 100644 --- a/multithreading/structures/include/unbounded_queue/UnboundedQueue.h +++ b/multithreading/structures/include/unbounded_queue/UnboundedQueue.h @@ -1,7 +1,7 @@ #pragma once #include - +#include namespace multithreading::structures::unbounded_queue { diff --git a/multithreading/utilities/include/Application.h b/multithreading/utilities/include/Application.h index 15c4e7b..9f04b75 100644 --- a/multithreading/utilities/include/Application.h +++ b/multithreading/utilities/include/Application.h @@ -26,7 +26,7 @@ namespace multithreading::utilities { std::optional SafeStart(std::function task) noexcept { try { return task(); - } catch (const std::exception& exception) { + } catch (std::exception& exception) { const std::string prefix = "Uncaught exception at " + appInformation.appName; std::cout << prefix << ": " << exception.what() << '\n'; From 437560ea3e80f111c79aaf913ff160bf7e563b4c Mon Sep 17 00:00:00 2001 From: isoldpower Date: Tue, 2 Dec 2025 17:21:32 +0500 Subject: [PATCH 04/19] feat(structures): lock-free bounded queue implementation implementation is using Dmitry Vyukov's MPMC bounded queue algorithm --- .../include/bounded_queue/BoundedQueue.h | 5 + .../bounded_queue/FGLockBoundedQueue.h | 18 +- .../bounded_queue/LockFreeBoundedQueue.h | 253 ++++++++++++++++-- .../unbounded_queue/FGLockUnboundedQueue.h | 15 +- .../unbounded_queue/LockFreeUnboundedQueue.h | 16 +- .../include/unbounded_queue/UnboundedQueue.h | 5 + 6 files changed, 287 insertions(+), 25 deletions(-) diff --git a/multithreading/structures/include/bounded_queue/BoundedQueue.h b/multithreading/structures/include/bounded_queue/BoundedQueue.h index 1051df6..2c3fee4 100644 --- a/multithreading/structures/include/bounded_queue/BoundedQueue.h +++ b/multithreading/structures/include/bounded_queue/BoundedQueue.h @@ -11,6 +11,11 @@ namespace multithreading::structures::bounded_queue { public: virtual ~BoundedQueue() = default; + BoundedQueue(const BoundedQueue&) = delete; + BoundedQueue& operator=(const BoundedQueue&) = delete; + BoundedQueue(BoundedQueue&& other) = delete; + BoundedQueue& operator=(BoundedQueue&& other) = delete; + virtual std::optional try_dequeue() = 0; virtual std::optional wait_dequeue( const std::chrono::steady_clock::duration& timeout diff --git a/multithreading/structures/include/bounded_queue/FGLockBoundedQueue.h b/multithreading/structures/include/bounded_queue/FGLockBoundedQueue.h index 313a428..833ad05 100644 --- a/multithreading/structures/include/bounded_queue/FGLockBoundedQueue.h +++ b/multithreading/structures/include/bounded_queue/FGLockBoundedQueue.h @@ -101,14 +101,25 @@ namespace multithreading::structures::bounded_queue { std::lock_guard head_lock(head_mu); std::lock_guard tail_lock(tail_mu); - while (head != tail) { - FGNode* dummy = head; + while (true) { + // Optimisation purpose. If we put the ID-dependant condition in the while loop, + // it will decrease the performance. + if (head == tail) { + break; + } + + const FGNode* dummy = head; head = head->next(); delete dummy; } delete tail; } + FGLockBoundedQueueImpl(const FGLockBoundedQueueImpl& other) = delete; + FGLockBoundedQueueImpl& operator=(const FGLockBoundedQueueImpl& other) = delete; + FGLockBoundedQueueImpl(FGLockBoundedQueueImpl&& other) = delete; + FGLockBoundedQueueImpl& operator=(FGLockBoundedQueueImpl&& other) = delete; + std::optional try_dequeue() { // Quick check to decrease amount of mutex-waiting when queue is obviously empty if (size_counter.load(std::memory_order_relaxed) == 0) { @@ -202,7 +213,8 @@ namespace multithreading::structures::bounded_queue { FGLockBoundedQueueImpl impl; public: explicit FGLockBoundedQueue(const size_t sizeLimit) noexcept - : impl(sizeLimit) + : BoundedQueue() + , impl(sizeLimit) {} FGLockBoundedQueue(FGLockBoundedQueue&& other) = delete; diff --git a/multithreading/structures/include/bounded_queue/LockFreeBoundedQueue.h b/multithreading/structures/include/bounded_queue/LockFreeBoundedQueue.h index 1dc519f..5f7be57 100644 --- a/multithreading/structures/include/bounded_queue/LockFreeBoundedQueue.h +++ b/multithreading/structures/include/bounded_queue/LockFreeBoundedQueue.h @@ -8,52 +8,273 @@ namespace multithreading::structures::bounded_queue { template - class LockFreeBoundedQueue final : public BoundedQueue { + struct SlotValue { private: - const size_t size_limit; + // Generally speaking, the purpose is to reserve the memory of desired size and with + // desired align that type is using, but leave the possibility to not always store the value. + // Modern C++ versions support the std::optional which gives same functionality. + std::aligned_storage_t storage; + + T* get_pointer() { + return reinterpret_cast(&storage); + } + + const T* get_pointer() const { + return reinterpret_cast(&storage); + } + public: + void emplace(const T& value) { + // We get the pointer + new (get_pointer()) T(value); + } + + void emplace(T&& value) { + new (get_pointer()) T(std::move(value)); + } + + void erase() { + (*get_pointer()).~T(); + } + + T* get() { + return get_pointer(); + } + + const T* get() const { + return get_pointer(); + } + + T take() { + T result = std::move(*get_pointer()); + erase(); + return result; + } + }; + + // Sequence has 2 states: + // 1) 'full' (lap_number * capacity) + 1: (i.e. capacity = 10, then 1, 11, 21, 31... indicate 'full') + // 2) 'empty' (lap_number * capacity): (i.e. capacity = 10, then 0, 10, 20, 30... indicate 'empty') + template + struct CircularSlot { + public: + std::atomic sequence; + SlotValue value; + }; + + template + class LockFreeBoundedQueueImpl { + private: + const size_t capacity; + std::vector> data; + + std::counting_semaphore<> items_available; + alignas(64) std::atomic enqueue_pos; + alignas(64) std::atomic dequeue_pos; + public: + explicit LockFreeBoundedQueueImpl(const size_t capacity) + : capacity(capacity) + , data(capacity) + , items_available(0) + , enqueue_pos(0) + , dequeue_pos(0) + { + for (size_t i = 0; i < capacity; ++i) { + data[i].sequence.store(i, std::memory_order_relaxed); + } + } + + LockFreeBoundedQueueImpl(LockFreeBoundedQueueImpl&&) = delete; + LockFreeBoundedQueueImpl(const LockFreeBoundedQueueImpl&) = delete; + LockFreeBoundedQueueImpl& operator=(const LockFreeBoundedQueueImpl&) = delete; + LockFreeBoundedQueueImpl& operator=(LockFreeBoundedQueueImpl&&) = delete; + + ~LockFreeBoundedQueueImpl() { + while (try_dequeue().has_value()) {} + }; + + std::optional try_dequeue() { + while (true) { + size_t position = dequeue_pos.load(std::memory_order_relaxed); + const size_t lap_position = position % capacity; + const size_t sequence = data[lap_position].sequence.load(std::memory_order_acquire); + + if (sequence == position) { + // Sequence indicates 'empty' state. Probably means that our queue is empty. + return std::nullopt; + } + if (sequence != position + 1) { + // Sequence does not indicate 'Full' state, the slot is not ready to be read + // by consumer. Continue spinning. + continue; + } + + // CAS to ensure that dequeue_pos didn't update while we were proceeding. This way + // we will either claim the position or identify race condition loss and retry. + if (dequeue_pos.compare_exchange_weak( + position, + position + 1, + std::memory_order_relaxed, + std::memory_order_relaxed + )) { + // Position successfully claimed, proceed and return the value. + T result = data[lap_position].value.take(); + data[lap_position].sequence.store(position + capacity, std::memory_order_release); + items_available.release(-1); + + return result; + } + } + } + + std::optional wait_dequeue( + const std::chrono::steady_clock::duration& timeout + ) { + const auto deadline = std::chrono::steady_clock::now() + timeout; + + // We don't check the time equality inside the while loop as it is ID-based and + // affects performance in a bad way. + while (true) { + const auto now_time = std::chrono::steady_clock::now(); + if (now_time >= deadline) { + // Time limit exceeded, break from cycle. + break; + } + + // Wait for time left after previous iterations. + if (items_available.try_acquire_for(deadline - now_time)) { + // If the value added to semaphore - try to dequeue it. It can still fail + // in a racing condition with common .try_dequeue() callers. + if (auto result = try_dequeue(); result.has_value()) { + // Dequeue succeeded - return the result of operation. + return result; + } + } else { + // Time limit exceeded, break from cycle. + break; + } + } + + // Final attempt to dequeue the value. + return try_dequeue(); + } + + bool try_enqueue(T value) { + while (true) { + size_t position = enqueue_pos.load(std::memory_order_relaxed); + const size_t lap_position = position % capacity; + const size_t sequence = data[lap_position].sequence.load(std::memory_order_acquire); + + if (sequence < position) { + // The slot we are trying to address is still on the previous lap, + // which means that it is full and have not been read. It means that the + // queue is full + if (enqueue_pos.load(std::memory_order_acquire) == position) { + // Ensure atomic state didn't change and if it didn't return false. + return false; + } + // Atomic state changed, probably we've got new space to enqueue. + // Retry in next iteration. + continue; + } else if (sequence > position) { + // We lost racing condition and someone has claimed our slot before we did. + // Continue to get fresh enqueue position in next iteration. + continue; + } else { + // Slot is ready and in the correct 'Empty' state. Proceed with CAS operation + // to ensure position didn't change since the beginning of the operation. + if (enqueue_pos.compare_exchange_weak( + position, + position + 1, + std::memory_order_relaxed, + std::memory_order_relaxed + )) { + data[lap_position].value.emplace(std::move(value)); + data[lap_position].sequence.store(position + 1, std::memory_order_release); + items_available.release(1); + return true; + } + } + } + } + + bool is_empty() const { + while (true) { + const size_t position = dequeue_pos.load(std::memory_order_relaxed); + const size_t lap_position = position % capacity; + const size_t sequence = data[lap_position].sequence.load(std::memory_order_acquire); + + if (position == dequeue_pos.load(std::memory_order_acquire)) { + // If the sequence equals to position, it means that Slot state is 'Empty', which means + // that the queue is empty. + return sequence == position; + } + } + } + + bool is_full() const { + while (true) { + const size_t position = enqueue_pos.load(std::memory_order_relaxed); + const size_t lap_position = position % capacity; + const size_t sequence = data[lap_position].sequence.load(std::memory_order_acquire); + + if (position == enqueue_pos.load(std::memory_order_acquire)) { + // If the sequence is less than position, it means that our sequence is + // still on previous lap and wasn't read. Indicates that queue is full. + return sequence < position; + } + } + } + }; + + template + class LockFreeBoundedQueue : public BoundedQueue { + private: + LockFreeBoundedQueueImpl impl; public: - explicit LockFreeBoundedQueue(size_t sizeLimit) - : size_limit(sizeLimit) + explicit LockFreeBoundedQueue(const size_t capacity) + : BoundedQueue() + , impl(capacity) {} + ~LockFreeBoundedQueue() override = default; + LockFreeBoundedQueue(LockFreeBoundedQueue&&) = delete; LockFreeBoundedQueue(const LockFreeBoundedQueue&) = delete; LockFreeBoundedQueue& operator=(const LockFreeBoundedQueue&) = delete; LockFreeBoundedQueue& operator=(LockFreeBoundedQueue&&) = delete; - ~LockFreeBoundedQueue() override = default; - std::optional try_dequeue() override { - + return impl.try_dequeue(); } std::optional wait_dequeue( const std::chrono::steady_clock::duration& timeout ) override { - + return impl.wait_dequeue(timeout); } std::future> wait_dequeue_async( const std::chrono::steady_clock::duration& timeout ) override { - + return std::async(std::launch::async, [this, timeout]() -> auto { + return this->impl.wait_dequeue(timeout); + }); } bool try_enqueue(const T& value) override { - + return impl.try_enqueue(value); } bool try_enqueue(T&& value) override { - + return impl.try_enqueue(std::move(value)); } - - bool is_empty(bool isPrecise) const override { - + [[nodiscard]] bool is_empty(bool) const override { + return impl.is_empty(); } - bool is_full(bool isPrecise) const override { - + [[nodiscard]] bool is_full(bool) const override { + return impl.is_full(); } }; } // namespace multithreading::structures::bounded_queue \ No newline at end of file diff --git a/multithreading/structures/include/unbounded_queue/FGLockUnboundedQueue.h b/multithreading/structures/include/unbounded_queue/FGLockUnboundedQueue.h index b121ea9..4788a6d 100644 --- a/multithreading/structures/include/unbounded_queue/FGLockUnboundedQueue.h +++ b/multithreading/structures/include/unbounded_queue/FGLockUnboundedQueue.h @@ -85,14 +85,25 @@ namespace multithreading::structures::unbounded_queue { std::lock_guard head_lock(head_mu); std::lock_guard tail_lock(tail_mu); - while (head != tail) { - FGNode* dummy = head; + while (true) { + // Optimisation purpose. If we put the ID-dependant condition in the while loop, + // it will decrease the performance. + if (head == tail) { + break; + } + + const FGNode* dummy = head; head = head->next(); delete dummy; } delete tail; } + FGLockUnboundedQueueImpl(const FGLockUnboundedQueueImpl& other) = delete; + FGLockUnboundedQueueImpl& operator=(const FGLockUnboundedQueueImpl& other) = delete; + FGLockUnboundedQueueImpl(FGLockUnboundedQueueImpl&& other) = delete; + FGLockUnboundedQueueImpl& operator=(FGLockUnboundedQueueImpl&& other) = delete; + void enqueue(FGNode* node) { std::lock_guard lock(tail_mu); diff --git a/multithreading/structures/include/unbounded_queue/LockFreeUnboundedQueue.h b/multithreading/structures/include/unbounded_queue/LockFreeUnboundedQueue.h index 65bdf68..fc3af2d 100644 --- a/multithreading/structures/include/unbounded_queue/LockFreeUnboundedQueue.h +++ b/multithreading/structures/include/unbounded_queue/LockFreeUnboundedQueue.h @@ -185,6 +185,11 @@ namespace multithreading::structures::unbounded_queue { delete dummy; } + LockFreeUnboundedQueueImpl(const LockFreeUnboundedQueueImpl&) = delete; + LockFreeUnboundedQueueImpl& operator=(const LockFreeUnboundedQueueImpl&) = delete; + LockFreeUnboundedQueueImpl(LockFreeUnboundedQueueImpl&&) = delete; + LockFreeUnboundedQueueImpl& operator=(LockFreeUnboundedQueueImpl&&) = delete; + void enqueue(LockFreeNode* node) { enqueue_node(node); // Tell semaphore we have 1 item enqueued @@ -256,19 +261,22 @@ namespace multithreading::structures::unbounded_queue { LockFreeUnboundedQueueImpl impl; public: explicit LockFreeUnboundedQueue(const LockFreeQueueConfig& config) noexcept - : impl({ config.maxUpdateDepth }) + : UnboundedQueue() + , impl({ config.maxUpdateDepth }) {} LockFreeUnboundedQueue() noexcept - : impl({ DEFAULT_MAX_ATTEMPTS }) + : UnboundedQueue() + , impl(DEFAULT_MAX_ATTEMPTS) {} + ~LockFreeUnboundedQueue() override = default; + LockFreeUnboundedQueue(LockFreeUnboundedQueue&& other) = delete; + LockFreeUnboundedQueue& operator=(LockFreeUnboundedQueue&& other) = delete; LockFreeUnboundedQueue(const LockFreeUnboundedQueue& other) = delete; LockFreeUnboundedQueue& operator=(const LockFreeUnboundedQueue& other) = delete; - LockFreeUnboundedQueue& operator=(LockFreeUnboundedQueue&& other) = delete; - ~LockFreeUnboundedQueue() override = default; void enqueue(const T& value) override { auto* newNode = new LockFreeNode(value); diff --git a/multithreading/structures/include/unbounded_queue/UnboundedQueue.h b/multithreading/structures/include/unbounded_queue/UnboundedQueue.h index 565ef30..5b1421f 100644 --- a/multithreading/structures/include/unbounded_queue/UnboundedQueue.h +++ b/multithreading/structures/include/unbounded_queue/UnboundedQueue.h @@ -10,6 +10,11 @@ namespace multithreading::structures::unbounded_queue { public: virtual ~UnboundedQueue() = default; + UnboundedQueue(const UnboundedQueue&) = delete; + UnboundedQueue& operator=(const UnboundedQueue&) = delete; + UnboundedQueue(UnboundedQueue&&) = delete; + UnboundedQueue& operator=(UnboundedQueue&&) = delete; + virtual std::optional try_dequeue() = 0; virtual std::optional wait_dequeue( const std::chrono::steady_clock::duration& timeout From 87352c96870fe74fe3ae026ab7745e57b02291fd Mon Sep 17 00:00:00 2001 From: isoldpower Date: Sun, 7 Dec 2025 23:50:24 +0500 Subject: [PATCH 05/19] feat: unbounded queue and bounded queue benchmarks --- .clang-tidy | 4 +- executables/CMakeLists.txt | 14 +- .../include/BoundedQueueBenchmark.h | 23 + executables/benchmarks/include/ThreadConfig.h | 13 + .../include/UnboundedQueueBenchmark.h | 23 + .../include/mcmp/BoundedQueueMCMPBenchmark.h | 22 + .../mcmp/UnboundedQueueMCMPBenchmark.h | 22 + .../benchmarks/src/BoundedQueueBenchmark.cpp | 11 + .../src/UnboundedQueueBenchmark.cpp | 11 + .../src/mcmp/BoundedQueueMCMPBenchmark.cpp | 34 ++ .../src/mcmp/UnboundedQueueMCMPBenchmark.cpp | 26 + executables/bounded_queue_benchmark.cpp | 124 ++--- executables/unbounded_queue_benchmark.cpp | 130 +++-- multithreading/CMakeLists.txt | 3 +- multithreading/structures/CMakeLists.txt | 10 +- .../include/bounded_queue/BoundedQueue.h | 1 + .../bounded_queue/LockFreeBoundedQueue.h | 443 ++++++++---------- .../include/linked_list/LinkedList.h | 5 + .../unbounded_queue/FGLockUnboundedQueue.h | 7 +- .../unbounded_queue/LockFreeUnboundedQueue.h | 40 +- .../include/unbounded_queue/UnboundedQueue.h | 1 + multithreading/utilities/CMakeLists.txt | 9 +- .../utilities/include/Application.cpp | 5 - .../utilities/include/Application.h | 37 +- .../include/benchmark/BenchmarkMatrix.h | 19 + .../include/benchmark/BenchmarkMeasurer.h | 27 ++ .../include/benchmark/BenchmarkRunner.h | 17 + .../include/benchmark/MultithreadingTask.h | 30 ++ .../benchmark/ProducerConsumerBenchmark.h | 15 + .../benchmark/mcmp/MCMPBenchmarkRunner.h | 25 + .../include/performance/AlignedField.h | 38 ++ .../utilities/include/placeholder.h | 18 - .../src/benchmark/BenchmarkMeasurer.cpp | 32 ++ .../benchmark/mcmp/MCMPBenchmarkRunner.cpp | 38 ++ multithreading/utilities/src/placeholder.cpp | 13 - 35 files changed, 852 insertions(+), 438 deletions(-) create mode 100644 executables/benchmarks/include/BoundedQueueBenchmark.h create mode 100644 executables/benchmarks/include/ThreadConfig.h create mode 100644 executables/benchmarks/include/UnboundedQueueBenchmark.h create mode 100644 executables/benchmarks/include/mcmp/BoundedQueueMCMPBenchmark.h create mode 100644 executables/benchmarks/include/mcmp/UnboundedQueueMCMPBenchmark.h create mode 100644 executables/benchmarks/src/BoundedQueueBenchmark.cpp create mode 100644 executables/benchmarks/src/UnboundedQueueBenchmark.cpp create mode 100644 executables/benchmarks/src/mcmp/BoundedQueueMCMPBenchmark.cpp create mode 100644 executables/benchmarks/src/mcmp/UnboundedQueueMCMPBenchmark.cpp create mode 100644 multithreading/structures/include/linked_list/LinkedList.h delete mode 100644 multithreading/utilities/include/Application.cpp create mode 100644 multithreading/utilities/include/benchmark/BenchmarkMatrix.h create mode 100644 multithreading/utilities/include/benchmark/BenchmarkMeasurer.h create mode 100644 multithreading/utilities/include/benchmark/BenchmarkRunner.h create mode 100644 multithreading/utilities/include/benchmark/MultithreadingTask.h create mode 100644 multithreading/utilities/include/benchmark/ProducerConsumerBenchmark.h create mode 100644 multithreading/utilities/include/benchmark/mcmp/MCMPBenchmarkRunner.h create mode 100644 multithreading/utilities/include/performance/AlignedField.h delete mode 100644 multithreading/utilities/include/placeholder.h create mode 100644 multithreading/utilities/src/benchmark/BenchmarkMeasurer.cpp create mode 100644 multithreading/utilities/src/benchmark/mcmp/MCMPBenchmarkRunner.cpp delete mode 100644 multithreading/utilities/src/placeholder.cpp diff --git a/.clang-tidy b/.clang-tidy index 3e44f1a..748c212 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,5 +1,5 @@ -Checks: 'clang-diagnostic-*,clang-analyzer-*,cppcoreguidelines-*,modernize-*,*,-clang-diagnostic-error,-llvmlibc-*,-llvm-include-order,-misc-include-cleaner,-fuchsia-trailing-return,-modernize-use-trailing-return-type,-altera-unroll-loops,-misc-use-anonymous-namespace' -WarningsAsErrors: 'clang-diagnostic-*,clang-analyzer-*,cppcoreguidelines-*,modernize-*,*,-clang-diagnostic-error,-llvmlibc-*,-llvm-include-order,-misc-include-cleaner,-fuchsia-trailing-return,-modernize-use-trailing-return-type,-altera-unroll-loops,-misc-use-anonymous-namespace' +Checks: 'clang-diagnostic-*,clang-analyzer-*,cppcoreguidelines-*,modernize-*,*,-clang-diagnostic-error,-llvmlibc-*,-llvm-include-order,-misc-include-cleaner,-fuchsia-trailing-return,-modernize-use-trailing-return-type,-altera-unroll-loops,-misc-use-anonymous-namespace,-portability-template-virtual-member-function,-altera-id-dependent-backward-branch' +WarningsAsErrors: 'clang-diagnostic-*,clang-analyzer-*,cppcoreguidelines-*,modernize-*,*,-clang-diagnostic-error,-llvmlibc-*,-llvm-include-order,-misc-include-cleaner,-fuchsia-trailing-return,-modernize-use-trailing-return-type,-altera-unroll-loops,-misc-use-anonymous-namespace,-portability-template-virtual-member-function,-altera-id-dependent-backward-branch' HeaderFilterRegex: '^(?!.*/build/).*' FormatStyle: google CheckOptions: diff --git a/executables/CMakeLists.txt b/executables/CMakeLists.txt index 9f55ffb..c91d887 100644 --- a/executables/CMakeLists.txt +++ b/executables/CMakeLists.txt @@ -1,5 +1,11 @@ -add_executable(unbounded_queue_benchmark unbounded_queue_benchmark.cpp) -add_executable(bounded_queue_benchmark bounded_queue_benchmark.cpp) +add_executable(unbounded_queue_benchmark + unbounded_queue_benchmark.cpp + benchmarks/src/UnboundedQueueBenchmark.cpp + benchmarks/src/mcmp/UnboundedQueueMCMPBenchmark.cpp) +add_executable(bounded_queue_benchmark + bounded_queue_benchmark.cpp + benchmarks/src/BoundedQueueBenchmark.cpp + benchmarks/src/mcmp/BoundedQueueMCMPBenchmark.cpp) -target_link_libraries(unbounded_queue_benchmark PRIVATE utilities cxx_setup) -target_link_libraries(bounded_queue_benchmark PRIVATE utilities cxx_setup) \ No newline at end of file +target_link_libraries(unbounded_queue_benchmark PRIVATE utilities structures cxx_setup) +target_link_libraries(bounded_queue_benchmark PRIVATE utilities structures cxx_setup) \ No newline at end of file diff --git a/executables/benchmarks/include/BoundedQueueBenchmark.h b/executables/benchmarks/include/BoundedQueueBenchmark.h new file mode 100644 index 0000000..b2f446e --- /dev/null +++ b/executables/benchmarks/include/BoundedQueueBenchmark.h @@ -0,0 +1,23 @@ +#pragma once + +#include + +#include + + +namespace executables::benchmarks { + + class BoundedQueueBenchmark { + protected: + std::shared_ptr< + multithreading::structures::bounded_queue::BoundedQueue + > benchmark_queue; + public: + explicit BoundedQueueBenchmark( + const std::shared_ptr< + multithreading::structures::bounded_queue::BoundedQueue> &queue + ); + }; + + +} // namespace executables::benchmarks \ No newline at end of file diff --git a/executables/benchmarks/include/ThreadConfig.h b/executables/benchmarks/include/ThreadConfig.h new file mode 100644 index 0000000..d51fca4 --- /dev/null +++ b/executables/benchmarks/include/ThreadConfig.h @@ -0,0 +1,13 @@ +#pragma once + +#include +#include + + +namespace executables::benchmarks { + + constexpr size_t THREAD_SIZE = 100; + constexpr size_t DEQUEUE_TIMEOUT_MS = 1000; + constexpr size_t QUEUE_SIZE = 10000; + const size_t THREADS_COUNT = std::thread::hardware_concurrency(); +} // namespace executables::benchmarks \ No newline at end of file diff --git a/executables/benchmarks/include/UnboundedQueueBenchmark.h b/executables/benchmarks/include/UnboundedQueueBenchmark.h new file mode 100644 index 0000000..3aabf11 --- /dev/null +++ b/executables/benchmarks/include/UnboundedQueueBenchmark.h @@ -0,0 +1,23 @@ +#pragma once + +#include + +#include + + +namespace executables::benchmarks { + + class UnboundedQueueBenchmark { + protected: + std::shared_ptr< + multithreading::structures::unbounded_queue::UnboundedQueue + > benchmark_queue; + public: + explicit UnboundedQueueBenchmark( + const std::shared_ptr< + multithreading::structures::unbounded_queue::UnboundedQueue> &queue + ); + }; + + +} // namespace executables::benchmarks \ No newline at end of file diff --git a/executables/benchmarks/include/mcmp/BoundedQueueMCMPBenchmark.h b/executables/benchmarks/include/mcmp/BoundedQueueMCMPBenchmark.h new file mode 100644 index 0000000..11cfdc3 --- /dev/null +++ b/executables/benchmarks/include/mcmp/BoundedQueueMCMPBenchmark.h @@ -0,0 +1,22 @@ +#pragma once + +#include + +#include "../BoundedQueueBenchmark.h" + +namespace executables::benchmarks::mcmp { + + class BoundedQueueMCMPBenchmark final + : public multithreading::utilities::benchmark::ProducerConsumerBenchmark + , BoundedQueueBenchmark + { + public: + explicit BoundedQueueMCMPBenchmark( + const std::shared_ptr< + multithreading::structures::bounded_queue::BoundedQueue> &queue + ); + + void producer_routine(size_t threadSize) override; + void consumer_routine(size_t threadSize) override; + }; +} // namespace executables::benchmarks::mcmp \ No newline at end of file diff --git a/executables/benchmarks/include/mcmp/UnboundedQueueMCMPBenchmark.h b/executables/benchmarks/include/mcmp/UnboundedQueueMCMPBenchmark.h new file mode 100644 index 0000000..360660c --- /dev/null +++ b/executables/benchmarks/include/mcmp/UnboundedQueueMCMPBenchmark.h @@ -0,0 +1,22 @@ +#pragma once + +#include + +#include "../UnboundedQueueBenchmark.h" + +namespace executables::benchmarks::mcmp { + + class UnboundedQueueMCMPBenchmark final + : public multithreading::utilities::benchmark::ProducerConsumerBenchmark + , UnboundedQueueBenchmark + { + public: + explicit UnboundedQueueMCMPBenchmark( + const std::shared_ptr< + multithreading::structures::unbounded_queue::UnboundedQueue> &queue + ); + + void producer_routine(size_t threadSize) override; + void consumer_routine(size_t threadSize) override; + }; +} // namespace executables::benchmarks::mcmp \ No newline at end of file diff --git a/executables/benchmarks/src/BoundedQueueBenchmark.cpp b/executables/benchmarks/src/BoundedQueueBenchmark.cpp new file mode 100644 index 0000000..929cf07 --- /dev/null +++ b/executables/benchmarks/src/BoundedQueueBenchmark.cpp @@ -0,0 +1,11 @@ +#include "../include/BoundedQueueBenchmark.h" + +namespace executables::benchmarks { + + BoundedQueueBenchmark::BoundedQueueBenchmark( + const std::shared_ptr< + multithreading::structures::bounded_queue::BoundedQueue> &queue + ) + : benchmark_queue(queue) + {} +} // namespace executables::benchmarks \ No newline at end of file diff --git a/executables/benchmarks/src/UnboundedQueueBenchmark.cpp b/executables/benchmarks/src/UnboundedQueueBenchmark.cpp new file mode 100644 index 0000000..856a314 --- /dev/null +++ b/executables/benchmarks/src/UnboundedQueueBenchmark.cpp @@ -0,0 +1,11 @@ +#include "../include/UnboundedQueueBenchmark.h" + +namespace executables::benchmarks { + + UnboundedQueueBenchmark::UnboundedQueueBenchmark( + const std::shared_ptr< + multithreading::structures::unbounded_queue::UnboundedQueue> &queue + ) + : benchmark_queue(queue) + {} +} // namespace executables::benchmarks \ No newline at end of file diff --git a/executables/benchmarks/src/mcmp/BoundedQueueMCMPBenchmark.cpp b/executables/benchmarks/src/mcmp/BoundedQueueMCMPBenchmark.cpp new file mode 100644 index 0000000..da57a36 --- /dev/null +++ b/executables/benchmarks/src/mcmp/BoundedQueueMCMPBenchmark.cpp @@ -0,0 +1,34 @@ +#include "../../include/mcmp/BoundedQueueMCMPBenchmark.h" + +namespace executables::benchmarks::mcmp { + + constexpr size_t MAX_CONSUME_RETRIES = 100; + + BoundedQueueMCMPBenchmark::BoundedQueueMCMPBenchmark( + const std::shared_ptr< + multithreading::structures::bounded_queue::BoundedQueue> &queue + ) + : BoundedQueueBenchmark(queue) + {} + + void BoundedQueueMCMPBenchmark::producer_routine(const size_t threadSize) { + for (size_t j = 0; j < threadSize; j++) { + while (!benchmark_queue->try_enqueue(static_cast(j))) { + // Keep trying until we have space + } + } + } + + void BoundedQueueMCMPBenchmark::consumer_routine(const size_t threadSize) { + for (size_t j = 0; j < threadSize; j++) { + size_t retries = 0; + while (!benchmark_queue->try_dequeue().has_value()) { + if (++retries > MAX_CONSUME_RETRIES) { + std::this_thread::yield(); + retries = 0; + } + } + } + } + +} // namespace executables::benchmarks::mcmp \ No newline at end of file diff --git a/executables/benchmarks/src/mcmp/UnboundedQueueMCMPBenchmark.cpp b/executables/benchmarks/src/mcmp/UnboundedQueueMCMPBenchmark.cpp new file mode 100644 index 0000000..7d8f669 --- /dev/null +++ b/executables/benchmarks/src/mcmp/UnboundedQueueMCMPBenchmark.cpp @@ -0,0 +1,26 @@ +#include "../../include/mcmp/UnboundedQueueMCMPBenchmark.h" + +namespace executables::benchmarks::mcmp { + + UnboundedQueueMCMPBenchmark::UnboundedQueueMCMPBenchmark( + const std::shared_ptr< + multithreading::structures::unbounded_queue::UnboundedQueue> &queue + ) + : UnboundedQueueBenchmark(queue) + {} + + void UnboundedQueueMCMPBenchmark::producer_routine(const size_t threadSize) { + for (size_t j = 0; j < threadSize; j++) { + benchmark_queue->enqueue(static_cast(j)); + } + } + + void UnboundedQueueMCMPBenchmark::consumer_routine(const size_t threadSize) { + for (size_t j = 0; j < threadSize; j++) { + while (!benchmark_queue->try_dequeue().has_value()) { + // Keep trying until we get a value + } + } + } + +} // namespace executables::benchmarks::mcmp \ No newline at end of file diff --git a/executables/bounded_queue_benchmark.cpp b/executables/bounded_queue_benchmark.cpp index 62822b2..adb61e7 100644 --- a/executables/bounded_queue_benchmark.cpp +++ b/executables/bounded_queue_benchmark.cpp @@ -1,82 +1,90 @@ -#include -#include - +#include #include #include -#include #include +#include +#include +#include +#include -constexpr size_t PER_THREAD_SIZE = 1000; -constexpr size_t DEQUEUE_TIMEOUT_MS = 1000; -constexpr size_t QUEUE_SIZE = 10000; +#include -template -static int runQueueMultithreading( - multithreading::structures::bounded_queue::BoundedQueue* queue -) { - const size_t threadsCount = std::thread::hardware_concurrency(); - std::vector threads {}; - threads.reserve(threadsCount); +#include "./benchmarks/include/ThreadConfig.h" +#include "./benchmarks/include/mcmp/BoundedQueueMCMPBenchmark.h" - for (size_t i = 0; i < threadsCount; ++i) { - threads.emplace_back([&queue] { - for (size_t j = 0; j < PER_THREAD_SIZE; j++) { - queue->try_enqueue(static_cast(j)); - } - }); - } +using multithreading::structures::bounded_queue::FGLockBoundedQueue; +using multithreading::structures::bounded_queue::BoundedQueue; +using multithreading::structures::bounded_queue::FGLockBoundedQueue; +using multithreading::structures::bounded_queue::LockFreeBoundedQueue; - for (size_t i = 0; i < PER_THREAD_SIZE * threadsCount; ++i) { - std::future> latestValue = queue->wait_dequeue_async( - std::chrono::milliseconds(DEQUEUE_TIMEOUT_MS) - ); +using multithreading::utilities::benchmark::BenchmarkTask; +using multithreading::utilities::benchmark::ProducerConsumerBenchmark; +using multithreading::utilities::benchmark::BenchmarkMeasurer; +using multithreading::utilities::benchmark::BenchmarkRunner; +using multithreading::utilities::benchmark::mcmp::MCMPBenchmarkRunner; - const std::optional value = latestValue.get(); - if (value.has_value()) { - std::cout << "Dequeued at iteration " << i + 1 << "(" << value.value() << ")" << '\n'; - } else { - i--; - } - } +using executables::benchmarks::mcmp::BoundedQueueMCMPBenchmark; +using executables::benchmarks::THREADS_COUNT; +using executables::benchmarks::THREAD_SIZE; +using executables::benchmarks::QUEUE_SIZE; + + +namespace executables { + template + static void benchmarkApplication(const std::array>, N>& queues) { + const multithreading::utilities::benchmark::BenchmarkMatrixDefinition matrix { + .per_thread_sizes = std::vector{ THREAD_SIZE, THREAD_SIZE * 10 }, + .threads_count = std::vector{ THREADS_COUNT, THREADS_COUNT * 2 } + }; + const BenchmarkMeasurer matrixMeasurer(matrix); - for (size_t i = 0; i < threadsCount; ++i) { - if (threads[i].joinable()) { - threads[i].join(); + for (const auto &queue : queues) { + std::shared_ptr const benchmark = + std::make_shared(queue.structure); + std::shared_ptr const runner = + std::make_shared(benchmark); + + std::cout << queue.title << "\n"; + const auto results = matrixMeasurer.measure_benchmark(runner); + for (const auto &[threads_count, thread_size, execution_time] : results) { + std::cout << "Threads (" << threads_count << ") Size (" << thread_size << ")\n"; + std::cout << "\tExecution time: " << execution_time.count() << "ns\n"; + } + std::cout << '\n'; } } - - return 0; -} +} // namespace executables auto main() -> int { - multithreading::utilities::Application benchmarkApplication( - multithreading::utilities::ApplicationInfo{ - .appName="Bounded Queue Benchmark", - .appVersion="1.0.0" + multithreading::utilities::Application benchmarkApplication( + multithreading::utilities::ApplicationInfo{ + .appName="Unbounded Queue Benchmark", + .appVersion="1.0.0", + .beforeTask = std::nullopt, + .afterTask = std::nullopt } ); - - const std::array< - std::unique_ptr>, 1 - > queues { - std::make_unique>(QUEUE_SIZE) + const std::array queues { + BenchmarkTask>( + std::make_shared>(QUEUE_SIZE), + "Lock-free Lock Queue Benchmark" + ), + BenchmarkTask>( + std::make_shared>(QUEUE_SIZE), + "Fine-Grained Lock Queue Benchmark" + ), }; - const std::optional executionResult = benchmarkApplication.SafeStart( - [&]() -> int { - return runQueueMultithreading(queues.at(0).get()); - } - ); - try { - if (executionResult.has_value()) { - return executionResult.value(); - } + const std::optional executionResult = benchmarkApplication.SafeStart([&]() { + executables::benchmarkApplication(queues); + return 1; + }); - return -1; + return executionResult.has_value() ? executionResult.value() : -1; } catch (std::exception& e) { std::cerr << e.what() << '\n'; return -1; } -} \ No newline at end of file +} diff --git a/executables/unbounded_queue_benchmark.cpp b/executables/unbounded_queue_benchmark.cpp index add4d1f..05ede54 100644 --- a/executables/unbounded_queue_benchmark.cpp +++ b/executables/unbounded_queue_benchmark.cpp @@ -1,94 +1,92 @@ -#include -#include - #include #include #include #include +#include +#include +#include +#include -template -static int runQueueMultithreading( - multithreading::structures::unbounded_queue::UnboundedQueue* queue -) { - constexpr size_t PER_THREAD_SIZE = 1000; - constexpr size_t DEQUEUE_TIMEOUT_MS = 1000; +#include - const size_t threadsCount = std::thread::hardware_concurrency(); - std::vector threads {}; - threads.reserve(threadsCount); +#include "./benchmarks/include/ThreadConfig.h" +#include "./benchmarks/include/mcmp/UnboundedQueueMCMPBenchmark.h" - for (size_t i = 0; i < threadsCount; ++i) { - threads.emplace_back([&queue] { - for (size_t j = 0; j < PER_THREAD_SIZE; j++) { - queue->enqueue(static_cast(j)); - } - }); - } +using multithreading::structures::unbounded_queue::FGLockUnboundedQueue; +using multithreading::structures::unbounded_queue::UnboundedQueue; +using multithreading::structures::unbounded_queue::FGLockUnboundedQueue; +using multithreading::structures::unbounded_queue::LockFreeUnboundedQueue; +using multithreading::structures::unbounded_queue::LockFreeQueueConfig; - for (size_t i = 0; i < PER_THREAD_SIZE * threadsCount; ++i) { - std::future> latestValue = queue->wait_dequeue_async( - std::chrono::milliseconds(DEQUEUE_TIMEOUT_MS) - ); +using multithreading::utilities::benchmark::BenchmarkTask; +using multithreading::utilities::benchmark::ProducerConsumerBenchmark; +using multithreading::utilities::benchmark::BenchmarkMeasurer; +using multithreading::utilities::benchmark::BenchmarkRunner; +using multithreading::utilities::benchmark::mcmp::MCMPBenchmarkRunner; - const std::optional value = latestValue.get(); - if (value.has_value()) { - std::cout << "Dequeued at iteration " << i + 1 << "(" << value.value() << ")" << '\n'; - } else { - i--; - } - } +using executables::benchmarks::mcmp::UnboundedQueueMCMPBenchmark; +using executables::benchmarks::THREADS_COUNT; +using executables::benchmarks::THREAD_SIZE; + + +namespace executables { + template + static void benchmarkApplication(const std::array>, N>& queues) { + const multithreading::utilities::benchmark::BenchmarkMatrixDefinition matrix { + .per_thread_sizes = std::vector{ THREAD_SIZE, THREAD_SIZE * 10 }, + .threads_count = std::vector{ THREADS_COUNT, THREADS_COUNT * 2 } + }; + const BenchmarkMeasurer matrixMeasurer(matrix); - for (size_t i = 0; i < threadsCount; ++i) { - if (threads[i].joinable()) { - threads[i].join(); + for (const auto &queue : queues) { + std::shared_ptr const benchmark = + std::make_shared(queue.structure); + std::shared_ptr const runner = + std::make_shared(benchmark); + + std::cout << queue.title << "\n"; + const auto results = matrixMeasurer.measure_benchmark(runner); + for (const auto &[threads_count, thread_size, execution_time] : results) { + std::cout << "Threads (" << threads_count << ") Size (" << thread_size << ")\n"; + std::cout << "\tExecution time: " << execution_time.count() << "mics\n"; + } + std::cout << '\n'; } } - - return 0; -} +} // namespace executables auto main() -> int { - multithreading::utilities::Application benchmarkApplication( - multithreading::utilities::ApplicationInfo{ + multithreading::utilities::Application benchmarkApplication( + multithreading::utilities::ApplicationInfo{ .appName="Unbounded Queue Benchmark", - .appVersion="1.0.0" + .appVersion="1.0.0", + .beforeTask = std::nullopt, + .afterTask = std::nullopt } ); - - const std::array< - std::unique_ptr>, 2 - > queues { - std::make_unique>(), - std::make_unique>( - multithreading::structures::unbounded_queue::LockFreeQueueConfig{ + const std::array queues { + BenchmarkTask>( + std::make_shared>(LockFreeQueueConfig{ .maxUpdateDepth = 10000 - } + }), + "Lock-free Queue Benchmark" + ), + BenchmarkTask>( + std::make_shared>(), + "Fine-Grained Lock Queue Benchmark" ) }; - const std::optional executionResult = benchmarkApplication.SafeStart( - [&]() -> int { - return runQueueMultithreading(queues.at(0).get()); - } - ); - const std::optional executionResult2 = benchmarkApplication.SafeStart( - [&]() -> int { - return runQueueMultithreading(queues.at(1).get()); - } - ); - try { - if (executionResult.has_value()) { - return executionResult.value(); - } - if (executionResult2.has_value()) { - return executionResult2.value(); - } + const std::optional executionResult = benchmarkApplication.SafeStart([&]() { + executables::benchmarkApplication(queues); + return 1; + }); - return -1; + return executionResult.has_value() ? executionResult.value() : -1; } catch (std::exception& e) { std::cerr << e.what() << '\n'; return -1; } -} \ No newline at end of file +} diff --git a/multithreading/CMakeLists.txt b/multithreading/CMakeLists.txt index a6f9881..4a42d9b 100644 --- a/multithreading/CMakeLists.txt +++ b/multithreading/CMakeLists.txt @@ -1 +1,2 @@ -add_subdirectory(utilities) \ No newline at end of file +add_subdirectory(utilities) +add_subdirectory(structures) \ No newline at end of file diff --git a/multithreading/structures/CMakeLists.txt b/multithreading/structures/CMakeLists.txt index 79b7a12..be011c4 100644 --- a/multithreading/structures/CMakeLists.txt +++ b/multithreading/structures/CMakeLists.txt @@ -1,12 +1,12 @@ -add_library(utilities +add_library(structures src/placeholder.cpp) if (BUILD_TESTING) - add_executable(tests tests/main.cpp) - target_link_libraries(tests PRIVATE GTest::gtest_main) + add_executable(structures_tests tests/main.cpp) + target_link_libraries(structures_tests PRIVATE GTest::gtest_main) include(GoogleTest) - gtest_discover_tests(tests) + gtest_discover_tests(structures_tests) endif() include_directories(include) -target_link_libraries(utilities PUBLIC cxx_setup) \ No newline at end of file +target_link_libraries(structures PUBLIC cxx_setup) \ No newline at end of file diff --git a/multithreading/structures/include/bounded_queue/BoundedQueue.h b/multithreading/structures/include/bounded_queue/BoundedQueue.h index 2c3fee4..24f0a24 100644 --- a/multithreading/structures/include/bounded_queue/BoundedQueue.h +++ b/multithreading/structures/include/bounded_queue/BoundedQueue.h @@ -9,6 +9,7 @@ namespace multithreading::structures::bounded_queue { template class BoundedQueue { public: + BoundedQueue() = default; virtual ~BoundedQueue() = default; BoundedQueue(const BoundedQueue&) = delete; diff --git a/multithreading/structures/include/bounded_queue/LockFreeBoundedQueue.h b/multithreading/structures/include/bounded_queue/LockFreeBoundedQueue.h index 5f7be57..df74415 100644 --- a/multithreading/structures/include/bounded_queue/LockFreeBoundedQueue.h +++ b/multithreading/structures/include/bounded_queue/LockFreeBoundedQueue.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include @@ -7,274 +9,243 @@ namespace multithreading::structures::bounded_queue { - template - struct SlotValue { - private: - // Generally speaking, the purpose is to reserve the memory of desired size and with - // desired align that type is using, but leave the possibility to not always store the value. - // Modern C++ versions support the std::optional which gives same functionality. - std::aligned_storage_t storage; - - T* get_pointer() { - return reinterpret_cast(&storage); - } - - const T* get_pointer() const { - return reinterpret_cast(&storage); - } - public: - void emplace(const T& value) { - // We get the pointer - new (get_pointer()) T(value); - } - - void emplace(T&& value) { - new (get_pointer()) T(std::move(value)); +template +struct SlotValue { +private: + // Generally speaking, the purpose is to reserve the memory of desired size and with + // desired align that type is using, but leave the possibility to not always store the value. + // Modern C++ versions support the std::optional which gives same functionality. + std::aligned_storage_t storage; + + T* get_pointer() { return reinterpret_cast(&storage); } + + const T* get_pointer() const { return reinterpret_cast(&storage); } + +public: + void emplace(const T& value) { + // We get the pointer + new (get_pointer()) T(value); + } + + void emplace(T&& value) { new (get_pointer()) T(std::move(value)); } + + void erase() { (*get_pointer()).~T(); } + + T* get() { return get_pointer(); } + + const T* get() const { return get_pointer(); } + + T take() { + T result = std::move(*get_pointer()); + erase(); + return result; + } +}; + +// Sequence has 2 states: +// 1) 'full' (lap_number * capacity) + 1: (i.e. capacity = 10, then 1, 11, 21, 31... indicate +// 'full') 2) 'empty' (lap_number * capacity): (i.e. capacity = 10, then 0, 10, 20, 30... indicate +// 'empty') +template +struct CircularSlot { +public: + std::atomic sequence; + SlotValue value; +}; + +template +class LockFreeBoundedQueueImpl { +private: + const size_t capacity; + std::vector> data; + + std::counting_semaphore<> items_available; + alignas(64) utilities::performance::AlignedField> enqueue_pos; + alignas(64) std::atomic dequeue_pos; + +public: + explicit LockFreeBoundedQueueImpl(const size_t capacity) + : capacity(capacity) + , data(capacity) + , items_available(0) + , enqueue_pos(0) + , dequeue_pos(0) + { + for (size_t i = 0; i < capacity; ++i) { + data[i].sequence.store(i, std::memory_order_relaxed); + } + } + + LockFreeBoundedQueueImpl(LockFreeBoundedQueueImpl&&) = delete; + LockFreeBoundedQueueImpl(const LockFreeBoundedQueueImpl&) = delete; + LockFreeBoundedQueueImpl& operator=(const LockFreeBoundedQueueImpl&) = delete; + LockFreeBoundedQueueImpl& operator=(LockFreeBoundedQueueImpl&&) = delete; + + ~LockFreeBoundedQueueImpl() { + while (try_dequeue().has_value()) { } + }; - void erase() { - (*get_pointer()).~T(); - } + std::optional try_dequeue() { + while (true) { + size_t position = dequeue_pos.load(std::memory_order_relaxed); + const size_t lap_position = position % capacity; + const size_t sequence = data[lap_position].sequence.load(std::memory_order_acquire); - T* get() { - return get_pointer(); - } + if (sequence == position) { + // Sequence indicates 'empty' state. Probably means that our queue is empty. + return std::nullopt; + } + if (sequence != position + 1) { + // Sequence does not indicate 'Full' state, the slot is not ready to be read + // by consumer. Continue spinning. + continue; + } - const T* get() const { - return get_pointer(); + // CAS to ensure that dequeue_pos didn't update while we were proceeding. This way + // we will either claim the position or identify race condition loss and retry. + if (dequeue_pos.compare_exchange_weak( + position, + position + 1, + std::memory_order_acquire, + std::memory_order_relaxed) + ) { + // Position successfully claimed, proceed and return the value. + T result = data[lap_position].value.take(); + data[lap_position].sequence.store(position + capacity, std::memory_order_release); + return result; + } } + } - T take() { - T result = std::move(*get_pointer()); - erase(); - return result; - } - }; + std::optional wait_dequeue(const std::chrono::steady_clock::duration& timeout) { + const auto deadline = std::chrono::steady_clock::now() + timeout; - // Sequence has 2 states: - // 1) 'full' (lap_number * capacity) + 1: (i.e. capacity = 10, then 1, 11, 21, 31... indicate 'full') - // 2) 'empty' (lap_number * capacity): (i.e. capacity = 10, then 0, 10, 20, 30... indicate 'empty') - template - struct CircularSlot { - public: - std::atomic sequence; - SlotValue value; - }; + // We don't check the time equality inside the while loop as it is ID-based and + // affects performance in a bad way. + while (true) { + const auto now_time = std::chrono::steady_clock::now(); + if (now_time >= deadline) { + // Time limit exceeded, break from cycle. + break; + } - template - class LockFreeBoundedQueueImpl { - private: - const size_t capacity; - std::vector> data; - - std::counting_semaphore<> items_available; - alignas(64) std::atomic enqueue_pos; - alignas(64) std::atomic dequeue_pos; - public: - explicit LockFreeBoundedQueueImpl(const size_t capacity) - : capacity(capacity) - , data(capacity) - , items_available(0) - , enqueue_pos(0) - , dequeue_pos(0) - { - for (size_t i = 0; i < capacity; ++i) { - data[i].sequence.store(i, std::memory_order_relaxed); + // Wait for time left after previous iterations. + if (items_available.try_acquire_for(deadline - now_time)) { + // If the value added to semaphore - try to dequeue it. It can still fail + // in a racing condition with common .try_dequeue() callers. + if (auto result = try_dequeue(); result.has_value()) { + // Dequeue succeeded - return the result of operation. + return result; + } + } else { + // Time limit exceeded, break from cycle. + break; } } - LockFreeBoundedQueueImpl(LockFreeBoundedQueueImpl&&) = delete; - LockFreeBoundedQueueImpl(const LockFreeBoundedQueueImpl&) = delete; - LockFreeBoundedQueueImpl& operator=(const LockFreeBoundedQueueImpl&) = delete; - LockFreeBoundedQueueImpl& operator=(LockFreeBoundedQueueImpl&&) = delete; - - ~LockFreeBoundedQueueImpl() { - while (try_dequeue().has_value()) {} - }; + // Final attempt to dequeue the value. + return try_dequeue(); + } - std::optional try_dequeue() { - while (true) { - size_t position = dequeue_pos.load(std::memory_order_relaxed); - const size_t lap_position = position % capacity; - const size_t sequence = data[lap_position].sequence.load(std::memory_order_acquire); + bool try_enqueue(T value) { + while (true) { + size_t position = enqueue_pos->load(std::memory_order_relaxed); + const size_t lap_position = position % capacity; + const size_t sequence = data[lap_position].sequence.load(std::memory_order_acquire); - if (sequence == position) { - // Sequence indicates 'empty' state. Probably means that our queue is empty. - return std::nullopt; + if (sequence < position) { + // The slot we are trying to address is still on the previous lap, + // which means that it is full and have not been read. It means that the + // queue is full + if (enqueue_pos->load(std::memory_order_acquire) == position) { + // Ensure atomic state didn't change and if it didn't return false. + return false; } - if (sequence != position + 1) { - // Sequence does not indicate 'Full' state, the slot is not ready to be read - // by consumer. Continue spinning. - continue; - } - - // CAS to ensure that dequeue_pos didn't update while we were proceeding. This way - // we will either claim the position or identify race condition loss and retry. - if (dequeue_pos.compare_exchange_weak( - position, - position + 1, - std::memory_order_relaxed, - std::memory_order_relaxed - )) { - // Position successfully claimed, proceed and return the value. - T result = data[lap_position].value.take(); - data[lap_position].sequence.store(position + capacity, std::memory_order_release); - items_available.release(-1); - - return result; + // Atomic state changed, probably we've got new space to enqueue. + // Retry in next iteration. + continue; + } else if (sequence > position) { + // We lost racing condition and someone has claimed our slot before we did. + // Continue to get fresh enqueue position in next iteration. + continue; + } else { + // Slot is ready and in the correct 'Empty' state. Proceed with CAS operation + // to ensure position didn't change since the beginning of the operation. + if (enqueue_pos->compare_exchange_weak(position, position + 1, + std::memory_order_relaxed, + std::memory_order_relaxed)) { + data[lap_position].value.emplace(std::move(value)); + data[lap_position].sequence.store(position + 1, std::memory_order_release); + items_available.release(1); + return true; } } } + } - std::optional wait_dequeue( - const std::chrono::steady_clock::duration& timeout - ) { - const auto deadline = std::chrono::steady_clock::now() + timeout; - - // We don't check the time equality inside the while loop as it is ID-based and - // affects performance in a bad way. - while (true) { - const auto now_time = std::chrono::steady_clock::now(); - if (now_time >= deadline) { - // Time limit exceeded, break from cycle. - break; - } + bool is_empty() const { + while (true) { + const size_t position = dequeue_pos.load(std::memory_order_relaxed); + const size_t lap_position = position % capacity; + const size_t sequence = data[lap_position].sequence.load(std::memory_order_acquire); - // Wait for time left after previous iterations. - if (items_available.try_acquire_for(deadline - now_time)) { - // If the value added to semaphore - try to dequeue it. It can still fail - // in a racing condition with common .try_dequeue() callers. - if (auto result = try_dequeue(); result.has_value()) { - // Dequeue succeeded - return the result of operation. - return result; - } - } else { - // Time limit exceeded, break from cycle. - break; - } + if (position == dequeue_pos.load(std::memory_order_acquire)) { + // If the sequence equals to position, it means that Slot state is 'Empty', which + // means that the queue is empty. + return sequence == position; } - - // Final attempt to dequeue the value. - return try_dequeue(); } + } - bool try_enqueue(T value) { - while (true) { - size_t position = enqueue_pos.load(std::memory_order_relaxed); - const size_t lap_position = position % capacity; - const size_t sequence = data[lap_position].sequence.load(std::memory_order_acquire); - - if (sequence < position) { - // The slot we are trying to address is still on the previous lap, - // which means that it is full and have not been read. It means that the - // queue is full - if (enqueue_pos.load(std::memory_order_acquire) == position) { - // Ensure atomic state didn't change and if it didn't return false. - return false; - } - // Atomic state changed, probably we've got new space to enqueue. - // Retry in next iteration. - continue; - } else if (sequence > position) { - // We lost racing condition and someone has claimed our slot before we did. - // Continue to get fresh enqueue position in next iteration. - continue; - } else { - // Slot is ready and in the correct 'Empty' state. Proceed with CAS operation - // to ensure position didn't change since the beginning of the operation. - if (enqueue_pos.compare_exchange_weak( - position, - position + 1, - std::memory_order_relaxed, - std::memory_order_relaxed - )) { - data[lap_position].value.emplace(std::move(value)); - data[lap_position].sequence.store(position + 1, std::memory_order_release); - items_available.release(1); - return true; - } - } + bool is_full() const { + while (true) { + const size_t position = enqueue_pos->load(std::memory_order_relaxed); + const size_t lap_position = position % capacity; + const size_t sequence = data[lap_position].sequence.load(std::memory_order_acquire); + + if (position == enqueue_pos->load(std::memory_order_acquire)) { + // If the sequence is less than position, it means that our sequence is + // still on previous lap and wasn't read. Indicates that queue is full. + return sequence < position; } } + } +}; - bool is_empty() const { - while (true) { - const size_t position = dequeue_pos.load(std::memory_order_relaxed); - const size_t lap_position = position % capacity; - const size_t sequence = data[lap_position].sequence.load(std::memory_order_acquire); +template +class LockFreeBoundedQueue : public BoundedQueue { +private: + LockFreeBoundedQueueImpl impl; - if (position == dequeue_pos.load(std::memory_order_acquire)) { - // If the sequence equals to position, it means that Slot state is 'Empty', which means - // that the queue is empty. - return sequence == position; - } - } - } +public: + explicit LockFreeBoundedQueue(const size_t capacity) : BoundedQueue(), impl(capacity) {} - bool is_full() const { - while (true) { - const size_t position = enqueue_pos.load(std::memory_order_relaxed); - const size_t lap_position = position % capacity; - const size_t sequence = data[lap_position].sequence.load(std::memory_order_acquire); + ~LockFreeBoundedQueue() override = default; - if (position == enqueue_pos.load(std::memory_order_acquire)) { - // If the sequence is less than position, it means that our sequence is - // still on previous lap and wasn't read. Indicates that queue is full. - return sequence < position; - } - } - } - }; + LockFreeBoundedQueue(LockFreeBoundedQueue&&) = delete; + LockFreeBoundedQueue(const LockFreeBoundedQueue&) = delete; + LockFreeBoundedQueue& operator=(const LockFreeBoundedQueue&) = delete; + LockFreeBoundedQueue& operator=(LockFreeBoundedQueue&&) = delete; - template - class LockFreeBoundedQueue : public BoundedQueue { - private: - LockFreeBoundedQueueImpl impl; - public: - explicit LockFreeBoundedQueue(const size_t capacity) - : BoundedQueue() - , impl(capacity) - {} - - ~LockFreeBoundedQueue() override = default; - - LockFreeBoundedQueue(LockFreeBoundedQueue&&) = delete; - LockFreeBoundedQueue(const LockFreeBoundedQueue&) = delete; - LockFreeBoundedQueue& operator=(const LockFreeBoundedQueue&) = delete; - LockFreeBoundedQueue& operator=(LockFreeBoundedQueue&&) = delete; - - std::optional try_dequeue() override { - return impl.try_dequeue(); - } + std::optional try_dequeue() override { return impl.try_dequeue(); } - std::optional wait_dequeue( - const std::chrono::steady_clock::duration& timeout - ) override { - return impl.wait_dequeue(timeout); - } + std::optional wait_dequeue(const std::chrono::steady_clock::duration& timeout) override { + return impl.wait_dequeue(timeout); + } - std::future> wait_dequeue_async( - const std::chrono::steady_clock::duration& timeout - ) override { - return std::async(std::launch::async, [this, timeout]() -> auto { - return this->impl.wait_dequeue(timeout); - }); - } + std::future> wait_dequeue_async( + const std::chrono::steady_clock::duration& timeout) override { + return std::async(std::launch::async, + [this, timeout]() -> auto { return this->impl.wait_dequeue(timeout); }); + } - bool try_enqueue(const T& value) override { - return impl.try_enqueue(value); - } + bool try_enqueue(const T& value) override { return impl.try_enqueue(value); } - bool try_enqueue(T&& value) override { - return impl.try_enqueue(std::move(value)); - } + bool try_enqueue(T&& value) override { return impl.try_enqueue(std::move(value)); } - [[nodiscard]] bool is_empty(bool) const override { - return impl.is_empty(); - } + [[nodiscard]] bool is_empty(bool) const override { return impl.is_empty(); } - [[nodiscard]] bool is_full(bool) const override { - return impl.is_full(); - } - }; -} // namespace multithreading::structures::bounded_queue \ No newline at end of file + [[nodiscard]] bool is_full(bool) const override { return impl.is_full(); } +}; +} // namespace multithreading::structures::bounded_queue \ No newline at end of file diff --git a/multithreading/structures/include/linked_list/LinkedList.h b/multithreading/structures/include/linked_list/LinkedList.h new file mode 100644 index 0000000..5539a46 --- /dev/null +++ b/multithreading/structures/include/linked_list/LinkedList.h @@ -0,0 +1,5 @@ +#pragma once + +namespace multithreading::structures::linked_list { + +} \ No newline at end of file diff --git a/multithreading/structures/include/unbounded_queue/FGLockUnboundedQueue.h b/multithreading/structures/include/unbounded_queue/FGLockUnboundedQueue.h index 4788a6d..f9903bd 100644 --- a/multithreading/structures/include/unbounded_queue/FGLockUnboundedQueue.h +++ b/multithreading/structures/include/unbounded_queue/FGLockUnboundedQueue.h @@ -42,7 +42,7 @@ namespace multithreading::structures::unbounded_queue { template class FGLockUnboundedQueueImpl { - private: + private: mutable std::mutex head_mu; mutable std::mutex tail_mu; @@ -141,7 +141,10 @@ namespace multithreading::structures::unbounded_queue { private: FGLockUnboundedQueueImpl impl; public: - FGLockUnboundedQueue() noexcept = default; + FGLockUnboundedQueue() noexcept + : UnboundedQueue() + , impl() + {} FGLockUnboundedQueue(FGLockUnboundedQueue&& other) = delete; FGLockUnboundedQueue(const FGLockUnboundedQueue& other) = delete; diff --git a/multithreading/structures/include/unbounded_queue/LockFreeUnboundedQueue.h b/multithreading/structures/include/unbounded_queue/LockFreeUnboundedQueue.h index fc3af2d..078ce87 100644 --- a/multithreading/structures/include/unbounded_queue/LockFreeUnboundedQueue.h +++ b/multithreading/structures/include/unbounded_queue/LockFreeUnboundedQueue.h @@ -1,10 +1,10 @@ #pragma once -#include #include +#include #include "./UnboundedQueue.h" - +#include "multithreading/utilities/include/performance/AlignedField.h" namespace multithreading::structures::unbounded_queue { @@ -13,7 +13,7 @@ namespace multithreading::structures::unbounded_queue { }; template - struct LockFreeNode { + struct alignas(64) LockFreeNode { private: T value; std::atomic nextNode; @@ -57,8 +57,8 @@ namespace multithreading::structures::unbounded_queue { private: size_t maxAlgorithmDepth; - std::atomic*> head; - std::atomic*> tail; + utilities::performance::AlignedField*>> head; + utilities::performance::AlignedField*>> tail; std::counting_semaphore<> items_available; void enqueue_node(LockFreeNode* newNode) { @@ -66,12 +66,12 @@ namespace multithreading::structures::unbounded_queue { while (iterator < maxAlgorithmDepth) { iterator++; - LockFreeNode* last = tail.load(std::memory_order_acquire); + LockFreeNode* last = tail->load(std::memory_order_acquire); LockFreeNode* next = last->next(); // If the tail updated between reads, then we want to // retry reaching real tail in the next iteration - if (last == tail.load()) { + if (last == tail->load(std::memory_order_acquire)) { if (next == nullptr) { LockFreeNode* expected = nullptr; if (last->nextAtomic().compare_exchange_weak( @@ -81,7 +81,7 @@ namespace multithreading::structures::unbounded_queue { std::memory_order_acquire )) { // Write successful, try to update the tail - tail.compare_exchange_weak( + tail->compare_exchange_weak( last, newNode, std::memory_order_release, @@ -92,7 +92,7 @@ namespace multithreading::structures::unbounded_queue { } else { // At this point our tail didn't change, but the reference to next // got updated on tail. So we want to help our tail to be up-to-date. - tail.compare_exchange_weak( + tail->compare_exchange_weak( last, next, std::memory_order_release, @@ -113,13 +113,13 @@ namespace multithreading::structures::unbounded_queue { while (iterator < maxAlgorithmDepth) { iterator++; - LockFreeNode* first = head.load(std::memory_order_acquire); - LockFreeNode* last = tail.load(std::memory_order_acquire); + LockFreeNode* first = head->load(std::memory_order_acquire); + LockFreeNode* last = tail->load(std::memory_order_acquire); LockFreeNode* firstValuable = first->next(); // Check if data is valid and didn't change between reads. If it did - just continue // and redo the action in the next iteration. - if (first == head.load(std::memory_order_acquire)) { + if (first == head->load(std::memory_order_acquire)) { if (first == last) { if (firstValuable == nullptr) { // If our head equals to tail - it means that the queue is empty @@ -128,7 +128,7 @@ namespace multithreading::structures::unbounded_queue { } else { // But if the firstValuable node is not nullptr - it means that tail // wasn't updated, so its lagging behind. Help advance it - tail.compare_exchange_weak( + tail->compare_exchange_weak( last, firstValuable, std::memory_order_release, @@ -140,7 +140,7 @@ namespace multithreading::structures::unbounded_queue { } else { T value = firstValuable->get(); - if (head.compare_exchange_weak( + if (head->compare_exchange_weak( first, firstValuable, std::memory_order_release, @@ -174,14 +174,14 @@ namespace multithreading::structures::unbounded_queue { , items_available(0) { auto* dummy = new LockFreeNode(); - head.store(dummy, std::memory_order_relaxed); - tail.store(dummy, std::memory_order_relaxed); + head->store(dummy, std::memory_order_relaxed); + tail->store(dummy, std::memory_order_relaxed); } ~LockFreeUnboundedQueueImpl() { while (LockFreeUnboundedQueueImpl::try_dequeue().has_value()) {} - const LockFreeNode* dummy = head.load(std::memory_order_relaxed); + const LockFreeNode* dummy = head->load(std::memory_order_relaxed); delete dummy; } @@ -238,12 +238,12 @@ namespace multithreading::structures::unbounded_queue { while (iterator < maxAlgorithmDepth) { iterator++; - LockFreeNode* first = head.load(std::memory_order_acquire); - LockFreeNode* last = tail.load(std::memory_order_acquire); + LockFreeNode* first = head->load(std::memory_order_acquire); + LockFreeNode* last = tail->load(std::memory_order_acquire); // Memory consistency check. We want to make sure that the first didn't change // while we were reading last. - if (first == head.load(std::memory_order_acquire)) { + if (first == head->load(std::memory_order_acquire)) { return first == last; } } diff --git a/multithreading/structures/include/unbounded_queue/UnboundedQueue.h b/multithreading/structures/include/unbounded_queue/UnboundedQueue.h index 5b1421f..f47d392 100644 --- a/multithreading/structures/include/unbounded_queue/UnboundedQueue.h +++ b/multithreading/structures/include/unbounded_queue/UnboundedQueue.h @@ -9,6 +9,7 @@ namespace multithreading::structures::unbounded_queue { class UnboundedQueue { public: virtual ~UnboundedQueue() = default; + UnboundedQueue() = default; UnboundedQueue(const UnboundedQueue&) = delete; UnboundedQueue& operator=(const UnboundedQueue&) = delete; diff --git a/multithreading/utilities/CMakeLists.txt b/multithreading/utilities/CMakeLists.txt index 79b7a12..ea54d1b 100644 --- a/multithreading/utilities/CMakeLists.txt +++ b/multithreading/utilities/CMakeLists.txt @@ -1,11 +1,12 @@ add_library(utilities - src/placeholder.cpp) + src/benchmark/BenchmarkMeasurer.cpp + src/benchmark/mcmp/MCMPBenchmarkRunner.cpp) if (BUILD_TESTING) - add_executable(tests tests/main.cpp) - target_link_libraries(tests PRIVATE GTest::gtest_main) + add_executable(utilities_tests tests/main.cpp) + target_link_libraries(utilities_tests PRIVATE GTest::gtest_main) include(GoogleTest) - gtest_discover_tests(tests) + gtest_discover_tests(utilities_tests) endif() include_directories(include) diff --git a/multithreading/utilities/include/Application.cpp b/multithreading/utilities/include/Application.cpp deleted file mode 100644 index cc91bfe..0000000 --- a/multithreading/utilities/include/Application.cpp +++ /dev/null @@ -1,5 +0,0 @@ -// -// Created by Nikita Lapin on 23/11/2025. -// - -#include "Application.h" diff --git a/multithreading/utilities/include/Application.h b/multithreading/utilities/include/Application.h index 9f04b75..f99eb3e 100644 --- a/multithreading/utilities/include/Application.h +++ b/multithreading/utilities/include/Application.h @@ -5,27 +5,43 @@ #include #include #include +#include namespace multithreading::utilities { + template struct alignas(64) ApplicationInfo { public: std::string appName; std::string appVersion; + std::optional> beforeTask; + std::optional> afterTask; }; template class Application { private: - ApplicationInfo appInformation; + ApplicationInfo appInformation; public: - explicit Application(ApplicationInfo information) + explicit Application(ApplicationInfo information) : appInformation(std::move(information)) {} std::optional SafeStart(std::function task) noexcept { try { - return task(); + const auto beforeTaskDecorator = appInformation.beforeTask; + if (beforeTaskDecorator.has_value()) { + beforeTaskDecorator.value()(); + } + + T taskResult = task(); + + const auto afterTaskDecorator = appInformation.afterTask; + if (afterTaskDecorator.has_value()) { + afterTaskDecorator.value()(taskResult); + } + + return 0; } catch (std::exception& exception) { const std::string prefix = "Uncaught exception at " + appInformation.appName; std::cout << prefix << ": " << exception.what() << '\n'; @@ -33,6 +49,19 @@ namespace multithreading::utilities { return std::nullopt; } } - }; + template + std::array, N> SafeStartQueue( + std::array, N> task + ) noexcept { + std::array, N> executionResults{}; + + for (size_t i = 0; i < N; i++) { + std::optional executionResult = this->SafeStart(task.at(i)); + executionResults.at(i) = executionResult; + } + + return executionResults; + } + }; } // namespace multithreading::utilities diff --git a/multithreading/utilities/include/benchmark/BenchmarkMatrix.h b/multithreading/utilities/include/benchmark/BenchmarkMatrix.h new file mode 100644 index 0000000..f2951c9 --- /dev/null +++ b/multithreading/utilities/include/benchmark/BenchmarkMatrix.h @@ -0,0 +1,19 @@ +#pragma once + +#include + + +namespace multithreading::utilities::benchmark { + + struct alignas(64) BenchmarkMatrixDefinition { + public: + std::vector per_thread_sizes; + std::vector threads_count; + }; + + struct alignas(16) BenchmarkMatrixItem { + public: + size_t threads_count; + size_t thread_size; + }; +} // namespace multithreading::utilities::benchmark diff --git a/multithreading/utilities/include/benchmark/BenchmarkMeasurer.h b/multithreading/utilities/include/benchmark/BenchmarkMeasurer.h new file mode 100644 index 0000000..5822e67 --- /dev/null +++ b/multithreading/utilities/include/benchmark/BenchmarkMeasurer.h @@ -0,0 +1,27 @@ +#pragma once + +#include "./BenchmarkMatrix.h" +#include "./BenchmarkRunner.h" + + +namespace multithreading::utilities::benchmark { + + struct alignas(64) BenchmarkResult { + size_t threads_count; + size_t thread_size; + std::chrono::high_resolution_clock::duration execution_time; + }; + + class BenchmarkMeasurer { + private: + BenchmarkMatrixDefinition benchmark_matrix; + public: + explicit BenchmarkMeasurer(BenchmarkMatrixDefinition matrix) + : benchmark_matrix(std::move(matrix)) + {} + + std::vector measure_benchmark( + const std::shared_ptr& benchmark_runner + ) const; + }; +} // namespace multithreading::utilities::benchmark \ No newline at end of file diff --git a/multithreading/utilities/include/benchmark/BenchmarkRunner.h b/multithreading/utilities/include/benchmark/BenchmarkRunner.h new file mode 100644 index 0000000..12f3ba2 --- /dev/null +++ b/multithreading/utilities/include/benchmark/BenchmarkRunner.h @@ -0,0 +1,17 @@ +#pragma once + +#include "./BenchmarkMatrix.h" + + +namespace multithreading::utilities::benchmark { + + class BenchmarkRunner { + public: + virtual ~BenchmarkRunner() = default; + BenchmarkRunner() = default; + + virtual void run_benchmark_with( + const BenchmarkMatrixItem& item + ) = 0; + }; +} // namespace multithreading::utilities::benchmark diff --git a/multithreading/utilities/include/benchmark/MultithreadingTask.h b/multithreading/utilities/include/benchmark/MultithreadingTask.h new file mode 100644 index 0000000..dee1722 --- /dev/null +++ b/multithreading/utilities/include/benchmark/MultithreadingTask.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + + +namespace multithreading::utilities::benchmark { + + template + struct alignas(64) BenchmarkTask { + std::string title; + std::shared_ptr structure; + + BenchmarkTask(std::string title, std::shared_ptr structure) + : title(std::move(title)) + , structure(structure) + {} + + BenchmarkTask(std::shared_ptr structure, std::string title) + : title(std::move(title)) + , structure(structure) + {} + + BenchmarkTask(const BenchmarkTask&) = delete; + BenchmarkTask& operator=(const BenchmarkTask&) = delete; + + BenchmarkTask(BenchmarkTask&&) noexcept = default; + BenchmarkTask& operator=(BenchmarkTask&&) noexcept = default; + }; +} // namespace multithreading::utilities::benchmark \ No newline at end of file diff --git a/multithreading/utilities/include/benchmark/ProducerConsumerBenchmark.h b/multithreading/utilities/include/benchmark/ProducerConsumerBenchmark.h new file mode 100644 index 0000000..af55ae7 --- /dev/null +++ b/multithreading/utilities/include/benchmark/ProducerConsumerBenchmark.h @@ -0,0 +1,15 @@ +#pragma once + +#include + + +namespace multithreading::utilities::benchmark { + + class ProducerConsumerBenchmark { + public: + virtual ~ProducerConsumerBenchmark() = default; + + virtual void producer_routine(std::size_t threadSize) = 0; + virtual void consumer_routine(std::size_t threadSize) = 0; + }; +} // namespace multithreading::utilities::benchmark \ No newline at end of file diff --git a/multithreading/utilities/include/benchmark/mcmp/MCMPBenchmarkRunner.h b/multithreading/utilities/include/benchmark/mcmp/MCMPBenchmarkRunner.h new file mode 100644 index 0000000..eaafb0d --- /dev/null +++ b/multithreading/utilities/include/benchmark/mcmp/MCMPBenchmarkRunner.h @@ -0,0 +1,25 @@ +#pragma once + +#include "../BenchmarkMatrix.h" +#include "../ProducerConsumerBenchmark.h" +#include "../BenchmarkRunner.h" +#include + + +namespace multithreading::utilities::benchmark::mcmp { + + class MCMPBenchmarkRunner : public BenchmarkRunner { + private: + std::shared_ptr benchmark; + public: + explicit MCMPBenchmarkRunner( + const std::shared_ptr& benchmark + ); + + ~MCMPBenchmarkRunner() override = default; + + void run_benchmark_with( + const BenchmarkMatrixItem& item + ) override; + }; +} // namespace multithreading::utilities::benchmark::mcmp diff --git a/multithreading/utilities/include/performance/AlignedField.h b/multithreading/utilities/include/performance/AlignedField.h new file mode 100644 index 0000000..e2c271b --- /dev/null +++ b/multithreading/utilities/include/performance/AlignedField.h @@ -0,0 +1,38 @@ +#pragma once + +#include + + +namespace multithreading::utilities::performance { + + template + struct AlignedField { + alignas(NAlignas) T value; + + static constexpr size_t padding_size = (NAlignas - (sizeof(T) % NAlignas)) % NAlignas; + std::array padding; + + AlignedField() = default; + template + explicit AlignedField(std::in_place_t, Args&&... args) + : value(std::forward(args)...) + {} + + template < + typename U, + typename = std::enable_if_t, AlignedField>> + > + explicit AlignedField(U&& u) + : value(std::forward(u)) + {} + + T& operator*() { return value; } + const T& operator*() const { return value; } + + T* operator->() { return &value; } + const T* operator->() const { return &value; } + + operator T&() { return value; } + operator const T&() const { return value; } + }; +} // namespace multithreading::utilities::performance diff --git a/multithreading/utilities/include/placeholder.h b/multithreading/utilities/include/placeholder.h deleted file mode 100644 index 94f648a..0000000 --- a/multithreading/utilities/include/placeholder.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once - -#include - - -namespace multithreading::utilities { - class Placeholder { - public: - Placeholder() = default; - Placeholder(const Placeholder&) = delete; - Placeholder& operator=(const Placeholder&) = delete; - Placeholder(Placeholder&&) = delete; - Placeholder& operator=(Placeholder&&) = delete; - ~Placeholder() = default; - - static void SayHello(const std::string& name); - }; -} // namespace multithreading::utilities \ No newline at end of file diff --git a/multithreading/utilities/src/benchmark/BenchmarkMeasurer.cpp b/multithreading/utilities/src/benchmark/BenchmarkMeasurer.cpp new file mode 100644 index 0000000..66a0666 --- /dev/null +++ b/multithreading/utilities/src/benchmark/BenchmarkMeasurer.cpp @@ -0,0 +1,32 @@ +#include "../../include/benchmark/BenchmarkMeasurer.h" + + +namespace multithreading::utilities::benchmark { + + std::vector BenchmarkMeasurer::measure_benchmark( + const std::shared_ptr &benchmark_runner + ) const { + std::vector benchmark_results; + + for (const size_t threads_count : benchmark_matrix.threads_count) { + for (const size_t thread_size : benchmark_matrix.per_thread_sizes) { + const auto start_time = std::chrono::high_resolution_clock::now(); + benchmark_runner->run_benchmark_with({ + .threads_count = threads_count, + .thread_size = thread_size + }); + const auto end_time = std::chrono::high_resolution_clock::now(); + + benchmark_results.emplace_back(BenchmarkResult{ + .threads_count = threads_count, + .thread_size = thread_size, + .execution_time = std::chrono::duration_cast( + end_time - start_time + ) + }); + } + } + + return benchmark_results; + } +} // namespace multithreading::utilities::benchmark \ No newline at end of file diff --git a/multithreading/utilities/src/benchmark/mcmp/MCMPBenchmarkRunner.cpp b/multithreading/utilities/src/benchmark/mcmp/MCMPBenchmarkRunner.cpp new file mode 100644 index 0000000..5ed5313 --- /dev/null +++ b/multithreading/utilities/src/benchmark/mcmp/MCMPBenchmarkRunner.cpp @@ -0,0 +1,38 @@ +#include "../../../include/benchmark/mcmp/MCMPBenchmarkRunner.h" + +#include + + +namespace multithreading::utilities::benchmark::mcmp { + + MCMPBenchmarkRunner::MCMPBenchmarkRunner( + const std::shared_ptr& benchmark + ) + : benchmark(benchmark) + {} + + void MCMPBenchmarkRunner::run_benchmark_with( + const BenchmarkMatrixItem& item + ) { + const auto half_threads = static_cast(floor(item.threads_count / 2)); + std::vector threads {}; + threads.reserve(item.threads_count); + + for (size_t i = 0; i < half_threads; ++i) { + threads.emplace_back([&]() { + benchmark->producer_routine(item.thread_size); + }); + } + for (size_t j = half_threads; j < item.threads_count; ++j) { + threads.emplace_back([&]() { + benchmark->consumer_routine(item.thread_size); + }); + } + + for (auto& thread : threads) { + if (thread.joinable()) { + thread.join(); + } + } + } +} // namespace multithreading::utilities::benchmark::mcmp \ No newline at end of file diff --git a/multithreading/utilities/src/placeholder.cpp b/multithreading/utilities/src/placeholder.cpp deleted file mode 100644 index c3bcf01..0000000 --- a/multithreading/utilities/src/placeholder.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include "../include/placeholder.h" - -#include -#include -#include - - -namespace multithreading::utilities { - - void Placeholder::SayHello(const std::string &name) { - std::cout << "Hello, world! from " << name << '\n'; - } -} // namespace multithreading::utilities \ No newline at end of file From 6e616f7ebb8a1246f945df9f28f467459c2dae81 Mon Sep 17 00:00:00 2001 From: isoldpower Date: Mon, 12 Jan 2026 22:08:56 -0600 Subject: [PATCH 06/19] feat: fine-grained lock linked list structure implementation --- .../include/linked_list/FGLockLinkedList.h | 413 ++++++++++++++++++ .../include/linked_list/LinkedList.h | 33 +- 2 files changed, 445 insertions(+), 1 deletion(-) create mode 100644 multithreading/structures/include/linked_list/FGLockLinkedList.h diff --git a/multithreading/structures/include/linked_list/FGLockLinkedList.h b/multithreading/structures/include/linked_list/FGLockLinkedList.h new file mode 100644 index 0000000..f683760 --- /dev/null +++ b/multithreading/structures/include/linked_list/FGLockLinkedList.h @@ -0,0 +1,413 @@ +#pragma once + +#include + +#include "./LinkedList.h" + +namespace multithreading::structures::linked_list { + + template + struct alignas(64) FGLockNode : public LinkedListNode { + private: + std::mutex node_mutex; + FGLockNode* next_node; + public: + explicit FGLockNode(const T& value) + : LinkedListNode(value) + , next_node(nullptr) + {} + + explicit FGLockNode(T&& value) + : LinkedListNode(std::move(value)) + , next_node(nullptr) + {} + + FGLockNode() + : LinkedListNode(T{}) + , next_node(nullptr) + {} + + [[nodiscard]] T value() { + return this->node_value; + } + + [[nodiscard]] FGLockNode* next() { + return this->next_node; + } + + void set_next(FGLockNode* node) { + this->next_node = node; + } + + void operate() { + node_mutex.lock(); + } + + void dispose() { + node_mutex.unlock(); + } + }; + + template + class FGLockLinkedListImpl { + private: + FGLockNode* head; + FGLockNode* tail; + + std::mutex tail_mutex; + std::mutex count_mutex; + size_t count {0}; + + void increment_count() { + std::lock_guard lock(count_mutex); + ++count; + } + + void decrement_count() { + std::lock_guard lock(count_mutex); + --count; + } + public: + FGLockLinkedListImpl() + : head(new FGLockNode()) + , tail(head) + {} + + ~FGLockLinkedListImpl() { + while (pop_front().has_value()) {} + delete head; + } + + void push_front(T item) { + FGLockNode* new_node = new FGLockNode(item); + + head->operate(); + FGLockNode* current_head = head->next(); + new_node->set_next(current_head); + head->set_next(new_node); + + if (current_head == nullptr) { + std::lock_guard lock(tail_mutex); + tail = new_node; + } + + head->dispose(); + increment_count(); + } + + void push_back(T item) { + FGLockNode* new_node = new FGLockNode(item); + + // Update last node's contents with internal lock. + tail->operate(); + tail->set_next(new_node); + tail->dispose(); + + // Safely update the tail reference with mutex lock, then release the lock. + { + std::lock_guard lock(tail_mutex); + tail = new_node; + } + // Increase the current size of the list. + increment_count(); + } + + LinkedListNode* push_at(T item, size_t index) { + size_t current_index = 0; + FGLockNode* iterator_node = head; + FGLockNode* iterator_next = nullptr; + + // Traverse up the queue to find the element at specified index. + iterator_node->operate(); + while (current_index < index) { + // We slide up the list: lock the current element first. + iterator_next = iterator_node->next(); + if (iterator_next == nullptr) { + // If the list has came to its end and we didn't find the desired index, + // then we return nullptr. Happens when index > queue size. + iterator_node->dispose(); + return nullptr; + } + + // Perform the slide up: lock the next element, unlock the current + // (as it now becomes 'previous'). + iterator_next->operate(); + iterator_node->dispose(); + iterator_node = iterator_next; + current_index++; + } + + // Create a new node and update all the references with proper mutex locking. + FGLockNode* new_node = new FGLockNode(item); + new_node->set_next(iterator_node->next()); + iterator_node->set_next(new_node); + // If the element we push is the last node, then update the tail reference. + if (iterator_node == tail) { + std::lock_guard lock(tail_mutex); + tail = new_node; + } + // Update the counter to be up-to-date with actual list size. + increment_count(); + iterator_node->dispose(); + return new_node; + } + + std::optional pop_front() { + head->operate(); + // Access the first real element, bypassing dummy node. + FGLockNode* current_head = head->next(); + // If the list is empty simply skip the processing. + if (current_head == nullptr) { + head->dispose(); + return std::nullopt; + } + + // Lock the first element to safely delete it from the list. + current_head->operate(); + head->set_next(current_head->next()); + // If the node deleted was the last one (only one node in the list), then we + // need to update the tail reference to be up-to-date. + if (current_head == tail) { + std::lock_guard lock(tail_mutex); + tail = head; + } + // Actually delete the node that is now out of the list. Unlock before freeing memory. + head->dispose(); + T result = current_head->value(); + current_head->dispose(); + delete current_head; + + // Update the counter after successful deletion. Return the snapshot of node's value. + decrement_count(); + return std::optional(result); + } + + std::optional pop_back() { + // Initial state to begin iterating. + FGLockNode* iterator_node = head; + FGLockNode* iterator_next = nullptr; + + iterator_node->operate(); + iterator_next = iterator_node->next(); + + // Check if we have anything to pop from the list. + if (iterator_next == nullptr) { + // If the list is empty, unlock the dummy. + iterator_node->dispose(); + return std::nullopt; + } + + // Find the second-to-last node to update the reference. + iterator_next->operate(); + while (iterator_next->next() != nullptr) { + iterator_node->dispose(); + iterator_node = iterator_next; + iterator_next = iterator_node->next(); + iterator_next->operate(); + } + + iterator_node->set_next(nullptr); + // Delete the last node (iterator_next) and update the pre-last (iterator_node) + // and tail references. + { + std::lock_guard lock(tail_mutex); + tail = iterator_node; + } + T result = iterator_next->value(); + iterator_next->dispose(); + iterator_node->dispose(); + delete iterator_next; + + decrement_count(); + return std::optional(result); + } + + std::optional pop_at(size_t index) { + // Initial state to begin iterating. + FGLockNode* iterator_node = head; + FGLockNode* iterator_next = nullptr; + + iterator_node->operate(); + iterator_next = iterator_node->next(); + + // If the list is empty, skip all further processing. + if (iterator_next == nullptr) { + iterator_node->dispose(); + return std::nullopt; + } + + // Find the node at passed index (iterator_next), previous node (iterator_node), + // and hold the locks on them to safely modify further. + iterator_next->operate(); + for (size_t i = 0; i < index; i++) { + FGLockNode* next_node = iterator_node->next(); + if (next_node == nullptr) { + iterator_next->dispose(); + iterator_node->dispose(); + return std::nullopt; + } + + next_node->operate(); + iterator_node->dispose(); + iterator_node = iterator_next; + iterator_next = next_node; + } + + // Update the reference at previous node. Lock is already on hold after traverse. + T result = iterator_next->value(); + iterator_node->set_next(iterator_next->next()); + // Safely update the tail reference if required. + if (iterator_next == tail) { + std::lock_guard lock(tail_mutex); + tail = iterator_node; + } + + // Release locks, delete the node that is not in the list anymore. + iterator_node->dispose(); + iterator_next->dispose(); + delete iterator_next; + + decrement_count(); + return std::optional(result); + } + + size_t size() { + std::lock_guard lock(count_mutex); + return count; + } + + bool empty() { + std::lock_guard lock(count_mutex); + return count == 0; + } + + bool contains(T item) { + // Initial state to begin iterating. + FGLockNode* iterator_node = head; + FGLockNode* iterator_next = nullptr; + + iterator_node->operate(); + iterator_next = iterator_node->next(); + + // Traverse the list to find the desired node. Return if found. + while (iterator_next != nullptr) { + iterator_next->operate(); + + if (iterator_next->value() == item) { + // If found, free the nodes and return true. + iterator_next->dispose(); + iterator_node->dispose(); + return true; + } + + iterator_node->dispose(); + iterator_node = iterator_next; + iterator_next = iterator_node->iterator_next(); + } + + // Free the last node and return false. + iterator_node->dispose(); + return false; + } + + LinkedListNode* find(const T& item) { + // Initial state to begin iterating. + FGLockNode* iterator_node = head; + FGLockNode* iterator_next = nullptr; + + iterator_node->operate(); + iterator_next = iterator_node->next(); + + // Traverse the list to find the desired node. Return if found. + while (iterator_next != nullptr) { + iterator_next->operate(); + + if (iterator_next->value() == item) { + // If found, free the nodes and return the found node. + iterator_next->dispose(); + iterator_node->dispose(); + return iterator_next; + } + + iterator_node->dispose(); + iterator_node = iterator_next; + iterator_next = iterator_node->iterator_next(); + } + + // Free the last node and return false. + iterator_node->dispose(); + return nullptr; + } + }; + + template + class FGLockLinkedList final : public LinkedList { + private: + std::unique_ptr> impl; + public: + FGLockLinkedList() : impl(std::make_unique>()) {} + FGLockLinkedList(const FGLockLinkedList &other) = delete; + FGLockLinkedList(FGLockLinkedList &&other) = delete; + + FGLockLinkedList& operator=(const FGLockLinkedList &other) = delete; + FGLockLinkedList& operator=(const FGLockLinkedList &&other) = delete; + + ~FGLockLinkedList() override = default; + + void push_front(T item) override { + impl->push_front(item); + } + void push_front(T&& item) override { + impl->push_front(std::move(item)); + } + void push_front(const T& item) override { + impl->push_front(item); + } + + void push_back(T item) override { + impl->push_back(item); + } + void push_back(T&& item) override { + impl->push_back(std::move(item)); + } + void push_back(const T& item) override { + impl->push_back(item); + } + + LinkedListNode* push_at(T item, size_t index) override { + return impl->push_at(item, index); + } + LinkedListNode* push_at(T&& item, size_t index) override { + return impl->push_at(std::move(item), index); + } + LinkedListNode* push_at(const T& item, size_t index) override { + return impl->push_at(item, index); + } + + std::optional pop_front() override { + return impl->pop_front(); + } + std::optional pop_back() override { + return impl->pop_back(); + } + std::optional pop_at(size_t index) override { + return impl->pop_at(index); + } + + bool empty() override { + return impl->empty(); + } + + size_t size() override { + return impl->size(); + } + + bool contains(T value) override { + return impl->contains(value); + } + + LinkedListNode* find(T value) override { + return impl->find(value); + } + }; +} // namespace multithreading::structures::linked_list \ No newline at end of file diff --git a/multithreading/structures/include/linked_list/LinkedList.h b/multithreading/structures/include/linked_list/LinkedList.h index 5539a46..41fe9f9 100644 --- a/multithreading/structures/include/linked_list/LinkedList.h +++ b/multithreading/structures/include/linked_list/LinkedList.h @@ -1,5 +1,36 @@ #pragma once +#include + + namespace multithreading::structures::linked_list { -} \ No newline at end of file + template + struct LinkedListNode { + protected: + T node_value; + + explicit LinkedListNode(T value) + : node_value(value) + {} + }; + + template + class LinkedList { + public: + virtual ~LinkedList() = 0; + + virtual void push_front(T item) = 0; + virtual void push_back(T item) = 0; + virtual LinkedListNode* push_at(T item, size_t index) = 0; + + virtual std::optional pop_front() = 0; + virtual std::optional pop_back() = 0; + virtual std::optional pop_at(size_t index) = 0; + + virtual bool empty() = 0; + virtual size_t size() = 0; + virtual bool contains(T value) = 0; + virtual LinkedListNode* find(T value) = 0; + }; +} // namespace multithreading::structures::linked_list \ No newline at end of file From caa4fdefdfd56ce1b6654a665ac49a8afa92fd12 Mon Sep 17 00:00:00 2001 From: isoldpower Date: Thu, 29 Jan 2026 00:44:58 -0600 Subject: [PATCH 07/19] feat: lock-free mcmp linked list implementation --- .../include/linked_list/LockFreeLinkedList.h | 545 ++++++++++++++++++ 1 file changed, 545 insertions(+) create mode 100644 multithreading/structures/include/linked_list/LockFreeLinkedList.h diff --git a/multithreading/structures/include/linked_list/LockFreeLinkedList.h b/multithreading/structures/include/linked_list/LockFreeLinkedList.h new file mode 100644 index 0000000..157b658 --- /dev/null +++ b/multithreading/structures/include/linked_list/LockFreeLinkedList.h @@ -0,0 +1,545 @@ +#pragma once + +#include "./LinkedList.h" + +namespace multithreading::structures::linked_list { + + template + struct alignas(8) LockFreeNode { + private: + std::atomic*> next_node; + T node_value; + + static constexpr uintptr_t MARK_BIT = 0x1; + + static bool is_marked(LockFreeNode* node) { + return (reinterpret_cast(node) & MARK_BIT) != 0; + } + + static LockFreeNode* unmark_node(LockFreeNode* node) { + return reinterpret_cast*>( + reinterpret_cast(node & ~MARK_BIT) + ); + } + + static LockFreeNode* mark_node(LockFreeNode* node) { + return reinterpret_cast*>( + reinterpret_cast(node) | MARK_BIT + ); + } + public: + LockFreeNode() + : next_node(nullptr) + , node_value(T{}) + {} + + explicit LockFreeNode(T value) + : next_node(nullptr) + , node_value(value) + {} + + [[nodiscard]] T value() { + return this->node_value; + } + }; + + template + class LockFreeLinkedListImpl { + private: + LockFreeNode* head; + std::atomic*> tail; + + void try_help_advance(LockFreeNode* node, LockFreeNode* next_node) { + if (node == nullptr) { + return; + } + + // Unmark the nodes for safety reasons. + LockFreeNode* unmarked_next = LockFreeNode::unmark_node(next_node); + LockFreeNode* after_next = unmarked_next->next_node.load(std::memory_order_acquire); + LockFreeNode* unmarked_after_next = LockFreeNode::unmark_node(after_next); + + // Help advance (physical delete) the next_node, then skip. + node->next_node.compare_exchange_weak( + next_node, + unmarked_after_next, + std::memory_order_release, + std::memory_order_acquire); + // Ignore if we fail. + } + + LockFreeNode* traverse_to(const size_t index) { + size_t iterator = 0; + LockFreeNode* current_node = head; + + // Traverse to the specified index. + while (iterator < index) { + LockFreeNode* next_node = current_node->next_node.load(std::memory_order_acquire); + if (next_node == nullptr) { + // Reached the end of the list and did not reach the passed index. + // Argument is incorrect, return. + return nullptr; + } + + if (LockFreeNode::is_marked(next_node)) { + this->try_help_advance(current_node, next_node); + continue; + } else { + // The state is consistent. Increase the iterator to proceed to next. + current_node = LockFreeNode::unmark_node(next_node); + iterator++; + } + } + + return current_node; + } + + LockFreeNode* traverse_to_second_to_last() { + LockFreeNode* current_node = head; + LockFreeNode* next_node = current_node->next_node.load(std::memory_order_acquire); + LockFreeNode* unmarked_next = LockFreeNode::unmark_node(next_node); + + // Traverse to the specified index. + while (unmarked_next != nullptr && unmarked_next->next_node.load(std::memory_order_acquire) != nullptr) { + if (LockFreeNode::is_marked(next_node)) { + this->try_help_advance(current_node, next_node); + } else { + // The next node is not marked. Update the outer scope to move forward. + current_node = LockFreeNode::unmark_node(next_node); + } + + next_node = current_node->next_node.load(std::memory_order_acquire); + unmarked_next = LockFreeNode::unmark_node(next_node); + } + + return current_node == head ? nullptr : current_node; + } + public: + LockFreeLinkedListImpl() + : head(new LockFreeNode()) + , tail(head) + {} + + ~LockFreeLinkedListImpl() { + while (pop_front().has_value()) {} + delete head; + } + + void push_front(T item) { + LockFreeNode* new_node = new LockFreeNode(item); + + while (true) { + // Get the state of the current head and unmark it to point to a valid address. + LockFreeNode* current_head = head->next_node.load(std::memory_order_acquire); + LockFreeNode* unmarked = LockFreeNode::unmark_node(current_head); + new_node->next_node.store(unmarked, std::memory_order_relaxed); + + // If the head did not change, then we insert the new node with CAS operation. + // If we did - continue to next iteration until success. + if (head->next_node.compare_exchange_weak( + current_head, + new_node, + std::memory_order_release, + std::memory_order_acquire + )) { + return; + } + } + } + + void push_back(T item) { + LockFreeNode* new_node = new LockFreeNode(item); + + while (true) { + LockFreeNode* current_tail = tail.load(std::memory_order_acquire); + LockFreeNode* after_tail = current_tail->next_node.load(std::memory_order_acquire); + + if (current_tail != tail.load(std::memory_order_acquire)) { + continue; + } + if (LockFreeNode::is_marked(after_tail)) { + this->try_help_advance(current_tail, after_tail); + continue; + } + + // Our tail did not change, it is consistent. + if (after_tail == nullptr) { + // The desired behavior, we are NOT in the middle of other thread's write. + if (current_tail->next_node.compare_exchange_weak( + after_tail, + new_node, + std::memory_order_release, + std::memory_order_acquire + )) { + // Successful binding of new node to the list. Now only tail + // reference update is required. + tail.compare_exchange_weak( + current_tail, + new_node, + std::memory_order_release, + std::memory_order_relaxed); + + return; + } else { + // The update failed. Just proceed to the next iteration to retry. + continue; + } + } else { + // Other thread is currently writing to the tail but the tail reference + // laggs behind. Try helping in advancing the tail to the actual node. + tail.compare_exchange_weak( + current_tail, + after_tail, + std::memory_order_release, + std::memory_order_relaxed); + } + } + } + + LinkedListNode* push_at(T item, const size_t index) { + LockFreeNode* new_node = new LockFreeNode(item); + + while (true) { + LockFreeNode* current_node = traverse_to(index); + // If we are out-of-bounds in traverse then simply delete the new node and return. + if (current_node == nullptr) { + delete new_node; + return nullptr; + } + + LockFreeNode* next_node = current_node->next_node.load(std::memory_order_acquire); + // Traverse successful, insert the node between current and current->next_node. + if (LockFreeNode::is_marked(next_node)) { + this->try_help_advance(current_node, next_node); + continue; + } else { + LockFreeNode* unmarked_next = LockFreeNode::unmark_node(next_node); + new_node->next_node.store(unmarked_next, std::memory_order_relaxed); + + if (current_node->next_node.compare_exchange_weak( + next_node, + new_node, + std::memory_order_release, + std::memory_order_acquire + )) { + if (unmarked_next == nullptr) { + tail.compare_exchange_weak( + current_node, + new_node, + std::memory_order_release, + std::memory_order_relaxed); + } + + return new_node; + } else { + // Our current_node changed somewhere in the process. + // Retry on next iteration. + continue; + } + } + } + } + + std::optional pop_front() { + while (true) { + LockFreeNode* current_head = head->next_node.load(std::memory_order_acquire); + if (LockFreeNode::is_marked(current_head)) { + // The next is marked, help advance the head's next reference and continue + // in the next iteration. + this->try_help_advance(head, current_head); + continue; + } else if (current_head == nullptr) { + return std::nullopt; + } else { + // The next is not marked, move to deletion. + T node_value = current_head->value(); + LockFreeNode* next_node = current_head->next_node.load(std::memory_order_acquire); + LockFreeNode* marked_next = LockFreeNode::mark_node(next_node); + + if (current_head->next_node.compare_exchange_weak( + next_node, + marked_next, + std::memory_order_release, + std::memory_order_acquire + )) { + // The logical deletion was successful - move to the physical deletion. + LockFreeNode* unmarked_next = LockFreeNode::unmark_node(next_node); + head->next_node.compare_exchange_weak( + current_head, + unmarked_next, + std::memory_order_release, + std::memory_order_relaxed); + + // Ignore the CAS result and finish the deletion. delete nullptr semantics + // are also valid in C++. + delete current_head; + return node_value; + } else { + // CAS failed, someone updated the head->next. Retry in next iteration. + continue; + } + } + } + } + + std::optional pop_back() { + while (true) { + // Traverse to the node before-the-tail. + LockFreeNode* before_last = this->traverse_to_second_to_last(); + if (before_last == nullptr) { + return std::nullopt; + } + + LockFreeNode* last_node = before_last->next_node.load(std::memory_order_acquire); + + if (last_node == nullptr) { + continue; + } else if (LockFreeNode::is_marked(last_node)) { + this->try_help_advance(before_last, last_node); + continue; + } + + // At this point last_node should already be unmarked, but we try to unmark + // one more time for extra safety. + LockFreeNode* unmarked_last = LinkedListNode::unmark_node(last_node); + T node_value = unmarked_last->value(); + LockFreeNode* after_last = unmarked_last->next_node.load(std::memory_order_acquire); + LockFreeNode* marked_after = LinkedListNode::mark_node(after_last); + if (!unmarked_last->next_node.compare_exchange_weak( + after_last, + marked_after, + std::memory_order_release, + std::memory_order_acquire + )) { + continue; + } + + // At this point the logical deletion with marking was successful. Now we need + // to physically delete the node from the list AND from memory. + before_last->next_node.compare_exchange_weak( + unmarked_last, + nullptr, + std::memory_order_release, + std::memory_order_relaxed); + // We ignore all the potential CAS fails because after the logical delete + // succeeded our tail and before_last are already in a safe state. The potential + // fails only mean that other threads may have helped advancing the state. + tail.compare_exchange_weak( + unmarked_last, + before_last, + std::memory_order_release, + std::memory_order_relaxed); + + // Free the related memory after the successful physical deletion of the node. + delete unmarked_last; + return std::optional(node_value); + } + } + + std::optional pop_at(size_t index) { + while (true) { + LockFreeNode* before_node = this->traverse_to(index); + if (before_node == nullptr) { + return std::nullopt; + } + LockFreeNode* referenced_node = before_node->next_node.load(std::memory_order_acquire); + if (referenced_node == nullptr) { + return std::nullopt; + } + + if (LockFreeNode::is_marked(referenced_node)) { + // Help advance. Skip to next iteration. + this->try_help_advance(before_node, referenced_node); + continue; + } else { + LockFreeNode* unmarked_referenced = LockFreeNode::unmark_node(referenced_node); + T node_value = unmarked_referenced->value(); + LockFreeNode* next_node = unmarked_referenced->next_node.load(std::memory_order_acquire); + LockFreeNode* next_marked = LockFreeNode::mark_node(next_node); + if (LockFreeNode::is_marked(next_node)) { + // If the next node is logically deleted - help advance it and retry. + this->try_help_advance(before_node, unmarked_referenced); + continue; + } + + if (unmarked_referenced->next_node.compare_exchange_weak( + next_node, + next_marked, + std::memory_order_release, + std::memory_order_acquire + )) { + // Successful logical delete. Move to physical delete. The next_node + // is unmarked at this point due to the check above. + before_node->next_node.compare_exchange_weak( + unmarked_referenced, + next_node, + std::memory_order_release, + std::memory_order_relaxed); + + // If we popped the last node - try advance the tail to reference the + // previous node. + if (next_node == nullptr) { + tail.compare_exchange_weak( + unmarked_referenced, + before_node, + std::memory_order_release, + std::memory_order_relaxed); + } + + delete unmarked_referenced; + return std::optional(node_value); + } else { + // CAS update for the logical delete failed. Move to the next iteration. + continue; + } + } + } + } + + bool empty() { + return head->next_node.load(std::memory_order_acquire) == nullptr; + } + + size_t size() { + size_t iterator = 0; + LockFreeNode* current_node = head->next_node.load(std::memory_order_acquire); + + // Traverse to the end of the list and count the size as we go. + // Return as soon as we encounter the nullptr (meaning the end of the list) + while (current_node != nullptr) { + // No advance-helping. Size() function should be const, without implicitly + // advancing or changing the nodes. + if (LockFreeNode::is_marked(current_node)) { + LockFreeNode* current_unmarked = LockFreeNode::unmark_node(current_node); + current_node = current_unmarked->next_node.load(std::memory_order_acquire); + continue; + } + + iterator++; + current_node = current_node->next_node.load(std::memory_order_acquire); + } + + return iterator; + } + + bool contains(const T& value) { + LockFreeNode* current_node = head->next_node.load(std::memory_order_acquire); + + // Traverse to the end of the list and count the size as we go. + // Return as soon as we encounter the nullptr (meaning the end of the list) + while (current_node != nullptr) { + // No advance-helping. Contains() function should be const, without implicitly + // advancing or changing the nodes. + if (LockFreeNode::is_marked(current_node)) { + LockFreeNode* current_unmarked = LockFreeNode::unmark_node(current_node); + current_node = current_unmarked->next_node.load(std::memory_order_acquire); + continue; + } + + T node_value = current_node->value(); + if (node_value == value) { + return true; + } else { + current_node = current_node->next_node.load(std::memory_order_acquire); + } + } + + return false; + } + + LinkedListNode* find(const T& value) { + LockFreeNode* current_node = head->next_node.load(std::memory_order_acquire); + + // Traverse to the end of the list and count the size as we go. + // Return as soon as we encounter the nullptr (meaning the end of the list) + while (current_node != nullptr) { + // No advance-helping. Find() function should be const, without implicitly + // advancing or changing the nodes. + if (LockFreeNode::is_marked(current_node)) { + LockFreeNode* current_unmarked = LockFreeNode::unmark_node(current_node); + current_node = current_unmarked->next_node.load(std::memory_order_acquire); + continue; + } + + T node_value = current_node->value(); + if (node_value == value) { + return current_node; + } else { + current_node = current_node->next_node.load(std::memory_order_acquire); + } + } + + return nullptr; + } + }; + + template + class LockFreeLinkedList final : public LinkedList { + private: + std::unique_ptr> impl; + public: + ~LockFreeLinkedList() override = default; + + LockFreeLinkedList() + : impl(std::make_unique>()) + {} + + LockFreeLinkedList(const LockFreeLinkedList& other) = delete; + LockFreeLinkedList& operator=(const LockFreeLinkedList& other) = delete; + LockFreeLinkedList(LockFreeLinkedList&& other) = delete; + LockFreeLinkedList& operator=(LockFreeLinkedList&& other) = delete; + + void push_front(T item) override { + impl->push_front(item); + } + void push_front(T&& item) override { + impl->push_front(std::move(item)); + } + void push_front(const T& item) override { + impl->push_front(item); + } + + void push_back(T item) override { + impl->push_back(item); + } + void push_back(T&& item) override { + impl->push_back(std::move(item)); + } + void push_back(const T& item) override { + impl->push_back(item); + } + + LinkedListNode* push_at(const T& item, const size_t index) override { + return impl->push_at(item, index); + } + LinkedListNode* push_at(T item, const size_t index) override { + return impl->push_at(item, index); + } + LinkedListNode* push_at(T&& item, const size_t index) override { + return impl->push_at(std::move(item), index); + } + + std::optional pop_front() override { + return impl->pop_front(); + } + std::optional pop_back() override { + return impl->pop_back(); + } + std::optional pop_at(size_t index) override { + return impl->pop_at(index); + } + + bool empty() override { + return impl->empty(); + } + size_t size() override { + return impl->size(); + } + bool contains(T value) override { + return impl->contains(value); + } + LinkedListNode* find(T value) override { + return impl->find(value); + } + }; +} // namespace multithreading::structures::linked_list \ No newline at end of file From 19e3ed255460bd7e7d99f3435373e72bc193141a Mon Sep 17 00:00:00 2001 From: isoldpower Date: Mon, 2 Feb 2026 14:08:23 -0600 Subject: [PATCH 08/19] feat: benchmark for the LinkedList structure ALERT: The benchmark is not running at this point due to the memory reclamation issues --- executables/CMakeLists.txt | 7 +- .../benchmarks/include/LinkedListBenchmark.h | 17 + .../include/UnboundedQueueBenchmark.h | 2 +- .../include/mcmp/LinkedListMCMPBenchmark.h | 26 ++ .../benchmarks/src/LinkedListBenchmark.cpp | 10 + .../src/mcmp/LinkedListMCMPBenchmark.cpp | 35 ++ executables/linked_list_benchmark.cpp | 91 ++++ .../bounded_queue/LockFreeBoundedQueue.h | 411 +++++++++--------- .../include/linked_list/FGLockLinkedList.h | 30 +- .../include/linked_list/LinkedList.h | 11 +- .../include/linked_list/LockFreeLinkedList.h | 77 ++-- 11 files changed, 434 insertions(+), 283 deletions(-) create mode 100644 executables/benchmarks/include/LinkedListBenchmark.h create mode 100644 executables/benchmarks/include/mcmp/LinkedListMCMPBenchmark.h create mode 100644 executables/benchmarks/src/LinkedListBenchmark.cpp create mode 100644 executables/benchmarks/src/mcmp/LinkedListMCMPBenchmark.cpp create mode 100644 executables/linked_list_benchmark.cpp diff --git a/executables/CMakeLists.txt b/executables/CMakeLists.txt index c91d887..93023a0 100644 --- a/executables/CMakeLists.txt +++ b/executables/CMakeLists.txt @@ -6,6 +6,11 @@ add_executable(bounded_queue_benchmark bounded_queue_benchmark.cpp benchmarks/src/BoundedQueueBenchmark.cpp benchmarks/src/mcmp/BoundedQueueMCMPBenchmark.cpp) +add_executable(linked_list_benchmark + linked_list_benchmark.cpp + benchmarks/src/LinkedListBenchmark.cpp + benchmarks/src/mcmp/LinkedListMCMPBenchmark.cpp) target_link_libraries(unbounded_queue_benchmark PRIVATE utilities structures cxx_setup) -target_link_libraries(bounded_queue_benchmark PRIVATE utilities structures cxx_setup) \ No newline at end of file +target_link_libraries(bounded_queue_benchmark PRIVATE utilities structures cxx_setup) +target_link_libraries(linked_list_benchmark PRIVATE utilities structures cxx_setup) \ No newline at end of file diff --git a/executables/benchmarks/include/LinkedListBenchmark.h b/executables/benchmarks/include/LinkedListBenchmark.h new file mode 100644 index 0000000..c69926c --- /dev/null +++ b/executables/benchmarks/include/LinkedListBenchmark.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + + +namespace executables::benchmarks { + + class LinkedListBenchmark { + protected: + const std::shared_ptr> &benchmark_list; + + explicit LinkedListBenchmark( + const std::shared_ptr> &list + ); + }; +} // namespace executables::benchmarks \ No newline at end of file diff --git a/executables/benchmarks/include/UnboundedQueueBenchmark.h b/executables/benchmarks/include/UnboundedQueueBenchmark.h index 3aabf11..9ba238d 100644 --- a/executables/benchmarks/include/UnboundedQueueBenchmark.h +++ b/executables/benchmarks/include/UnboundedQueueBenchmark.h @@ -12,7 +12,7 @@ namespace executables::benchmarks { std::shared_ptr< multithreading::structures::unbounded_queue::UnboundedQueue > benchmark_queue; - public: + explicit UnboundedQueueBenchmark( const std::shared_ptr< multithreading::structures::unbounded_queue::UnboundedQueue> &queue diff --git a/executables/benchmarks/include/mcmp/LinkedListMCMPBenchmark.h b/executables/benchmarks/include/mcmp/LinkedListMCMPBenchmark.h new file mode 100644 index 0000000..66679b0 --- /dev/null +++ b/executables/benchmarks/include/mcmp/LinkedListMCMPBenchmark.h @@ -0,0 +1,26 @@ +#pragma once + +#include + +#include + +#include "../LinkedListBenchmark.h" +#include "multithreading/structures/include/linked_list/FGLockLinkedList.h" + +namespace executables::benchmarks::mcmp { + + class LinkedListMCMPBenchmark final + : public multithreading::utilities::benchmark::ProducerConsumerBenchmark + , LinkedListBenchmark + { + public: + explicit LinkedListMCMPBenchmark( + const std::shared_ptr< + multithreading::structures::linked_list::LinkedList + > &list + ); + + void producer_routine(size_t threadSize) override; + void consumer_routine(size_t threadSize) override; + }; +} // namespace executables::benchmarks::mcmp \ No newline at end of file diff --git a/executables/benchmarks/src/LinkedListBenchmark.cpp b/executables/benchmarks/src/LinkedListBenchmark.cpp new file mode 100644 index 0000000..1de1bcb --- /dev/null +++ b/executables/benchmarks/src/LinkedListBenchmark.cpp @@ -0,0 +1,10 @@ +#include "../include/LinkedListBenchmark.h" + +namespace executables::benchmarks { + + LinkedListBenchmark::LinkedListBenchmark( + const std::shared_ptr> &list + ) + : benchmark_list(list) + {} +} // namespace executables::benchmarks diff --git a/executables/benchmarks/src/mcmp/LinkedListMCMPBenchmark.cpp b/executables/benchmarks/src/mcmp/LinkedListMCMPBenchmark.cpp new file mode 100644 index 0000000..ff31adb --- /dev/null +++ b/executables/benchmarks/src/mcmp/LinkedListMCMPBenchmark.cpp @@ -0,0 +1,35 @@ +#include "../../include/mcmp/LinkedListMCMPBenchmark.h" + +#include +#include +#include + + +namespace executables::benchmarks::mcmp { + + constexpr size_t MAX_CONSUME_RETRIES = 100; + + LinkedListMCMPBenchmark::LinkedListMCMPBenchmark( + const std::shared_ptr> &list + ) + : LinkedListBenchmark(list) + {} + + void LinkedListMCMPBenchmark::producer_routine(const size_t threadSize) { + for (size_t j = 0; j < threadSize; j++) { + benchmark_list->push_front(static_cast(j)); + } + } + + void LinkedListMCMPBenchmark::consumer_routine(const size_t threadSize) { + for (size_t j = 0; j < threadSize; j++) { + size_t retries = 0; + while (!benchmark_list->pop_front().has_value()) { + if (++retries > MAX_CONSUME_RETRIES) { + std::this_thread::yield(); + retries = 0; + } + } + } + } +} // namespace executables::benchmarks::mcmp \ No newline at end of file diff --git a/executables/linked_list_benchmark.cpp b/executables/linked_list_benchmark.cpp new file mode 100644 index 0000000..7c6a75b --- /dev/null +++ b/executables/linked_list_benchmark.cpp @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include + +#include +#include +// #include + +#include +#include +#include + +#include "./benchmarks/include/ThreadConfig.h" +#include "./benchmarks/include/mcmp/LinkedListMCMPBenchmark.h" + +using multithreading::structures::linked_list::LinkedList; +using multithreading::structures::linked_list::LockFreeLinkedList; +// using multithreading::structures::linked_list::FGLockLinkedList; + +using multithreading::utilities::benchmark::BenchmarkTask; +using multithreading::utilities::benchmark::ProducerConsumerBenchmark; +using multithreading::utilities::benchmark::BenchmarkMeasurer; +using multithreading::utilities::benchmark::BenchmarkRunner; +using multithreading::utilities::benchmark::mcmp::MCMPBenchmarkRunner; +using executables::benchmarks::mcmp::LinkedListMCMPBenchmark; + +using executables::benchmarks::THREADS_COUNT; +using executables::benchmarks::THREAD_SIZE; + +namespace executables { + template + static void benchmarkApplication(const std::array>, N>& lists) { + const multithreading::utilities::benchmark::BenchmarkMatrixDefinition matrix { + .per_thread_sizes = std::vector{ THREAD_SIZE, THREAD_SIZE * 10 }, + .threads_count = std::vector{ THREADS_COUNT, THREADS_COUNT * 2 } + }; + const BenchmarkMeasurer matrixMeasurer(matrix); + + for (const auto &list : lists) { + std::shared_ptr const benchmark = + std::make_shared(list.structure); + std::shared_ptr const runner = + std::make_shared(benchmark); + + std::cout << list.title << "\n"; + const auto results = matrixMeasurer.measure_benchmark(runner); + for (const auto &[threads_count, thread_size, execution_time] : results) { + std::cout << "Threads (" << threads_count << ") Size (" << thread_size << ")\n"; + std::cout << "\tExecution time: " << execution_time.count() << "mics\n"; + } + std::cout << '\n'; + } + } +} // namespace executables + +auto main() -> int { + multithreading::utilities::Application benchmarkApplication( + multithreading::utilities::ApplicationInfo{ + .appName="Linked List Benchmark", + .appVersion="1.0.0", + .beforeTask = std::nullopt, + .afterTask = std::nullopt + } + ); + + const std::array lists { + BenchmarkTask>( + std::make_shared>(), + "Lock-free Linked List Benchmark" + ), + // BenchmarkTask>( + // std::make_shared>(), + // "Fine-Grained Linked List Benchmark" + // ), + }; + + try { + const std::optional executionResult = benchmarkApplication.SafeStart([&]() { + executables::benchmarkApplication(lists); + return 1; + }); + + return executionResult.has_value() ? executionResult.value() : -1; + } catch (std::exception& e) { + std::cerr << e.what() << '\n'; + + return -1; + } +} \ No newline at end of file diff --git a/multithreading/structures/include/bounded_queue/LockFreeBoundedQueue.h b/multithreading/structures/include/bounded_queue/LockFreeBoundedQueue.h index df74415..11b49ac 100644 --- a/multithreading/structures/include/bounded_queue/LockFreeBoundedQueue.h +++ b/multithreading/structures/include/bounded_queue/LockFreeBoundedQueue.h @@ -9,243 +9,246 @@ namespace multithreading::structures::bounded_queue { -template -struct SlotValue { -private: - // Generally speaking, the purpose is to reserve the memory of desired size and with - // desired align that type is using, but leave the possibility to not always store the value. - // Modern C++ versions support the std::optional which gives same functionality. - std::aligned_storage_t storage; - - T* get_pointer() { return reinterpret_cast(&storage); } - - const T* get_pointer() const { return reinterpret_cast(&storage); } - -public: - void emplace(const T& value) { - // We get the pointer - new (get_pointer()) T(value); - } - - void emplace(T&& value) { new (get_pointer()) T(std::move(value)); } - - void erase() { (*get_pointer()).~T(); } - - T* get() { return get_pointer(); } - - const T* get() const { return get_pointer(); } - - T take() { - T result = std::move(*get_pointer()); - erase(); - return result; - } -}; - -// Sequence has 2 states: -// 1) 'full' (lap_number * capacity) + 1: (i.e. capacity = 10, then 1, 11, 21, 31... indicate -// 'full') 2) 'empty' (lap_number * capacity): (i.e. capacity = 10, then 0, 10, 20, 30... indicate -// 'empty') -template -struct CircularSlot { -public: - std::atomic sequence; - SlotValue value; -}; - -template -class LockFreeBoundedQueueImpl { -private: - const size_t capacity; - std::vector> data; - - std::counting_semaphore<> items_available; - alignas(64) utilities::performance::AlignedField> enqueue_pos; - alignas(64) std::atomic dequeue_pos; - -public: - explicit LockFreeBoundedQueueImpl(const size_t capacity) - : capacity(capacity) - , data(capacity) - , items_available(0) - , enqueue_pos(0) - , dequeue_pos(0) - { - for (size_t i = 0; i < capacity; ++i) { - data[i].sequence.store(i, std::memory_order_relaxed); + template + struct SlotValue { + private: + // Generally speaking, the purpose is to reserve the memory of desired size and with + // desired align that type is using, but leave the possibility to not always store the value. + // Modern C++ versions support the std::optional which gives same functionality. + std::aligned_storage_t storage; + + T* get_pointer() { return reinterpret_cast(&storage); } + + const T* get_pointer() const { return reinterpret_cast(&storage); } + + public: + void emplace(const T& value) { + // We get the pointer + new (get_pointer()) T(value); } - } - LockFreeBoundedQueueImpl(LockFreeBoundedQueueImpl&&) = delete; - LockFreeBoundedQueueImpl(const LockFreeBoundedQueueImpl&) = delete; - LockFreeBoundedQueueImpl& operator=(const LockFreeBoundedQueueImpl&) = delete; - LockFreeBoundedQueueImpl& operator=(LockFreeBoundedQueueImpl&&) = delete; + void emplace(T&& value) { new (get_pointer()) T(std::move(value)); } - ~LockFreeBoundedQueueImpl() { - while (try_dequeue().has_value()) { + void erase() { (*get_pointer()).~T(); } + + T* get() { return get_pointer(); } + + const T* get() const { return get_pointer(); } + + T take() { + T result = std::move(*get_pointer()); + erase(); + return result; } }; - std::optional try_dequeue() { - while (true) { - size_t position = dequeue_pos.load(std::memory_order_relaxed); - const size_t lap_position = position % capacity; - const size_t sequence = data[lap_position].sequence.load(std::memory_order_acquire); - - if (sequence == position) { - // Sequence indicates 'empty' state. Probably means that our queue is empty. - return std::nullopt; - } - if (sequence != position + 1) { - // Sequence does not indicate 'Full' state, the slot is not ready to be read - // by consumer. Continue spinning. - continue; - } + // Sequence has 2 states: + // 1) 'full' (lap_number * capacity) + 1: (i.e. capacity = 10, then 1, 11, 21, 31... indicate + // 'full') 2) 'empty' (lap_number * capacity): (i.e. capacity = 10, then 0, 10, 20, 30... indicate + // 'empty') + template + struct CircularSlot { + public: + std::atomic sequence; + SlotValue value; + }; - // CAS to ensure that dequeue_pos didn't update while we were proceeding. This way - // we will either claim the position or identify race condition loss and retry. - if (dequeue_pos.compare_exchange_weak( - position, - position + 1, - std::memory_order_acquire, - std::memory_order_relaxed) - ) { - // Position successfully claimed, proceed and return the value. - T result = data[lap_position].value.take(); - data[lap_position].sequence.store(position + capacity, std::memory_order_release); - return result; + template + class LockFreeBoundedQueueImpl { + private: + const size_t capacity; + std::vector> data; + + std::counting_semaphore<> items_available; + alignas(64) utilities::performance::AlignedField> enqueue_pos; + alignas(64) std::atomic dequeue_pos; + + public: + explicit LockFreeBoundedQueueImpl(const size_t capacity) + : capacity(capacity) + , data(capacity) + , items_available(0) + , enqueue_pos(0) + , dequeue_pos(0) + { + for (size_t i = 0; i < capacity; ++i) { + data[i].sequence.store(i, std::memory_order_relaxed); } } - } - - std::optional wait_dequeue(const std::chrono::steady_clock::duration& timeout) { - const auto deadline = std::chrono::steady_clock::now() + timeout; - - // We don't check the time equality inside the while loop as it is ID-based and - // affects performance in a bad way. - while (true) { - const auto now_time = std::chrono::steady_clock::now(); - if (now_time >= deadline) { - // Time limit exceeded, break from cycle. - break; + + LockFreeBoundedQueueImpl(LockFreeBoundedQueueImpl&&) = delete; + LockFreeBoundedQueueImpl(const LockFreeBoundedQueueImpl&) = delete; + LockFreeBoundedQueueImpl& operator=(const LockFreeBoundedQueueImpl&) = delete; + LockFreeBoundedQueueImpl& operator=(LockFreeBoundedQueueImpl&&) = delete; + + ~LockFreeBoundedQueueImpl() { + while (try_dequeue().has_value()) { } + }; + + std::optional try_dequeue() { + while (true) { + size_t position = dequeue_pos.load(std::memory_order_relaxed); + const size_t lap_position = position % capacity; + const size_t sequence = data[lap_position].sequence.load(std::memory_order_acquire); - // Wait for time left after previous iterations. - if (items_available.try_acquire_for(deadline - now_time)) { - // If the value added to semaphore - try to dequeue it. It can still fail - // in a racing condition with common .try_dequeue() callers. - if (auto result = try_dequeue(); result.has_value()) { - // Dequeue succeeded - return the result of operation. + if (sequence == position) { + // Sequence indicates 'empty' state. Probably means that our queue is empty. + return std::nullopt; + } + if (sequence != position + 1) { + // Sequence does not indicate 'Full' state, the slot is not ready to be read + // by consumer. Continue spinning. + continue; + } + + // CAS to ensure that dequeue_pos didn't update while we were proceeding. This way + // we will either claim the position or identify race condition loss and retry. + if (dequeue_pos.compare_exchange_weak( + position, + position + 1, + std::memory_order_acquire, + std::memory_order_relaxed) + ) { + // Position successfully claimed, proceed and return the value. + T result = data[lap_position].value.take(); + data[lap_position].sequence.store(position + capacity, std::memory_order_release); return result; } - } else { - // Time limit exceeded, break from cycle. - break; } } - // Final attempt to dequeue the value. - return try_dequeue(); - } - - bool try_enqueue(T value) { - while (true) { - size_t position = enqueue_pos->load(std::memory_order_relaxed); - const size_t lap_position = position % capacity; - const size_t sequence = data[lap_position].sequence.load(std::memory_order_acquire); - - if (sequence < position) { - // The slot we are trying to address is still on the previous lap, - // which means that it is full and have not been read. It means that the - // queue is full - if (enqueue_pos->load(std::memory_order_acquire) == position) { - // Ensure atomic state didn't change and if it didn't return false. - return false; + std::optional wait_dequeue(const std::chrono::steady_clock::duration& timeout) { + const auto deadline = std::chrono::steady_clock::now() + timeout; + + // We don't check the time equality inside the while loop as it is ID-based and + // affects performance in a bad way. + while (true) { + const auto now_time = std::chrono::steady_clock::now(); + if (now_time >= deadline) { + // Time limit exceeded, break from cycle. + break; + } + + // Wait for time left after previous iterations. + if (items_available.try_acquire_for(deadline - now_time)) { + // If the value added to semaphore - try to dequeue it. It can still fail + // in a racing condition with common .try_dequeue() callers. + if (auto result = try_dequeue(); result.has_value()) { + // Dequeue succeeded - return the result of operation. + return result; + } + } else { + // Time limit exceeded, break from cycle. + break; } - // Atomic state changed, probably we've got new space to enqueue. - // Retry in next iteration. - continue; - } else if (sequence > position) { - // We lost racing condition and someone has claimed our slot before we did. - // Continue to get fresh enqueue position in next iteration. - continue; - } else { - // Slot is ready and in the correct 'Empty' state. Proceed with CAS operation - // to ensure position didn't change since the beginning of the operation. - if (enqueue_pos->compare_exchange_weak(position, position + 1, - std::memory_order_relaxed, - std::memory_order_relaxed)) { - data[lap_position].value.emplace(std::move(value)); - data[lap_position].sequence.store(position + 1, std::memory_order_release); - items_available.release(1); - return true; + } + + // Final attempt to dequeue the value. + return try_dequeue(); + } + + bool try_enqueue(T value) { + while (true) { + size_t position = enqueue_pos->load(std::memory_order_relaxed); + const size_t lap_position = position % capacity; + const size_t sequence = data[lap_position].sequence.load(std::memory_order_acquire); + + if (sequence < position) { + // The slot we are trying to address is still on the previous lap, + // which means that it is full and have not been read. It means that the + // queue is full + if (enqueue_pos->load(std::memory_order_acquire) == position) { + // Ensure atomic state didn't change and if it didn't return false. + return false; + } + // Atomic state changed, probably we've got new space to enqueue. + // Retry in next iteration. + continue; + } else if (sequence > position) { + // We lost racing condition and someone has claimed our slot before we did. + // Continue to get fresh enqueue position in next iteration. + continue; + } else { + // Slot is ready and in the correct 'Empty' state. Proceed with CAS operation + // to ensure position didn't change since the beginning of the operation. + if (enqueue_pos->compare_exchange_weak(position, position + 1, + std::memory_order_relaxed, + std::memory_order_relaxed)) { + data[lap_position].value.emplace(std::move(value)); + data[lap_position].sequence.store(position + 1, std::memory_order_release); + items_available.release(1); + return true; + } } } } - } - - bool is_empty() const { - while (true) { - const size_t position = dequeue_pos.load(std::memory_order_relaxed); - const size_t lap_position = position % capacity; - const size_t sequence = data[lap_position].sequence.load(std::memory_order_acquire); - - if (position == dequeue_pos.load(std::memory_order_acquire)) { - // If the sequence equals to position, it means that Slot state is 'Empty', which - // means that the queue is empty. - return sequence == position; + + bool is_empty() const { + while (true) { + const size_t position = dequeue_pos.load(std::memory_order_relaxed); + const size_t lap_position = position % capacity; + const size_t sequence = data[lap_position].sequence.load(std::memory_order_acquire); + + if (position == dequeue_pos.load(std::memory_order_acquire)) { + // If the sequence equals to position, it means that Slot state is 'Empty', which + // means that the queue is empty. + return sequence == position; + } } } - } - - bool is_full() const { - while (true) { - const size_t position = enqueue_pos->load(std::memory_order_relaxed); - const size_t lap_position = position % capacity; - const size_t sequence = data[lap_position].sequence.load(std::memory_order_acquire); - - if (position == enqueue_pos->load(std::memory_order_acquire)) { - // If the sequence is less than position, it means that our sequence is - // still on previous lap and wasn't read. Indicates that queue is full. - return sequence < position; + + bool is_full() const { + while (true) { + const size_t position = enqueue_pos->load(std::memory_order_relaxed); + const size_t lap_position = position % capacity; + const size_t sequence = data[lap_position].sequence.load(std::memory_order_acquire); + + if (position == enqueue_pos->load(std::memory_order_acquire)) { + // If the sequence is less than position, it means that our sequence is + // still on previous lap and wasn't read. Indicates that queue is full. + return sequence < position; + } } } - } -}; + }; -template -class LockFreeBoundedQueue : public BoundedQueue { -private: - LockFreeBoundedQueueImpl impl; + template + class LockFreeBoundedQueue : public BoundedQueue { + private: + LockFreeBoundedQueueImpl impl; -public: - explicit LockFreeBoundedQueue(const size_t capacity) : BoundedQueue(), impl(capacity) {} + public: + explicit LockFreeBoundedQueue(const size_t capacity) + : BoundedQueue() + , impl(capacity) + {} - ~LockFreeBoundedQueue() override = default; + ~LockFreeBoundedQueue() override = default; - LockFreeBoundedQueue(LockFreeBoundedQueue&&) = delete; - LockFreeBoundedQueue(const LockFreeBoundedQueue&) = delete; - LockFreeBoundedQueue& operator=(const LockFreeBoundedQueue&) = delete; - LockFreeBoundedQueue& operator=(LockFreeBoundedQueue&&) = delete; + LockFreeBoundedQueue(LockFreeBoundedQueue&&) = delete; + LockFreeBoundedQueue(const LockFreeBoundedQueue&) = delete; + LockFreeBoundedQueue& operator=(const LockFreeBoundedQueue&) = delete; + LockFreeBoundedQueue& operator=(LockFreeBoundedQueue&&) = delete; - std::optional try_dequeue() override { return impl.try_dequeue(); } + std::optional try_dequeue() override { return impl.try_dequeue(); } - std::optional wait_dequeue(const std::chrono::steady_clock::duration& timeout) override { - return impl.wait_dequeue(timeout); - } + std::optional wait_dequeue(const std::chrono::steady_clock::duration& timeout) override { + return impl.wait_dequeue(timeout); + } - std::future> wait_dequeue_async( - const std::chrono::steady_clock::duration& timeout) override { - return std::async(std::launch::async, - [this, timeout]() -> auto { return this->impl.wait_dequeue(timeout); }); - } + std::future> wait_dequeue_async( + const std::chrono::steady_clock::duration& timeout) override { + return std::async(std::launch::async, + [this, timeout]() -> auto { return this->impl.wait_dequeue(timeout); }); + } - bool try_enqueue(const T& value) override { return impl.try_enqueue(value); } + bool try_enqueue(const T& value) override { return impl.try_enqueue(value); } - bool try_enqueue(T&& value) override { return impl.try_enqueue(std::move(value)); } + bool try_enqueue(T&& value) override { return impl.try_enqueue(std::move(value)); } - [[nodiscard]] bool is_empty(bool) const override { return impl.is_empty(); } + [[nodiscard]] bool is_empty(bool) const override { return impl.is_empty(); } - [[nodiscard]] bool is_full(bool) const override { return impl.is_full(); } -}; + [[nodiscard]] bool is_full(bool) const override { return impl.is_full(); } + }; } // namespace multithreading::structures::bounded_queue \ No newline at end of file diff --git a/multithreading/structures/include/linked_list/FGLockLinkedList.h b/multithreading/structures/include/linked_list/FGLockLinkedList.h index f683760..1cd263e 100644 --- a/multithreading/structures/include/linked_list/FGLockLinkedList.h +++ b/multithreading/structures/include/linked_list/FGLockLinkedList.h @@ -302,7 +302,7 @@ namespace multithreading::structures::linked_list { iterator_node->dispose(); iterator_node = iterator_next; - iterator_next = iterator_node->iterator_next(); + iterator_next = iterator_node->next(); } // Free the last node and return false. @@ -331,7 +331,7 @@ namespace multithreading::structures::linked_list { iterator_node->dispose(); iterator_node = iterator_next; - iterator_next = iterator_node->iterator_next(); + iterator_next = iterator_node->next(); } // Free the last node and return false. @@ -357,30 +357,10 @@ namespace multithreading::structures::linked_list { void push_front(T item) override { impl->push_front(item); } - void push_front(T&& item) override { - impl->push_front(std::move(item)); - } - void push_front(const T& item) override { - impl->push_front(item); - } - void push_back(T item) override { impl->push_back(item); } - void push_back(T&& item) override { - impl->push_back(std::move(item)); - } - void push_back(const T& item) override { - impl->push_back(item); - } - - LinkedListNode* push_at(T item, size_t index) override { - return impl->push_at(item, index); - } - LinkedListNode* push_at(T&& item, size_t index) override { - return impl->push_at(std::move(item), index); - } - LinkedListNode* push_at(const T& item, size_t index) override { + bool push_at(T item, size_t index) override { return impl->push_at(item, index); } @@ -405,9 +385,5 @@ namespace multithreading::structures::linked_list { bool contains(T value) override { return impl->contains(value); } - - LinkedListNode* find(T value) override { - return impl->find(value); - } }; } // namespace multithreading::structures::linked_list \ No newline at end of file diff --git a/multithreading/structures/include/linked_list/LinkedList.h b/multithreading/structures/include/linked_list/LinkedList.h index 41fe9f9..59c1f5f 100644 --- a/multithreading/structures/include/linked_list/LinkedList.h +++ b/multithreading/structures/include/linked_list/LinkedList.h @@ -18,11 +18,17 @@ namespace multithreading::structures::linked_list { template class LinkedList { public: - virtual ~LinkedList() = 0; + LinkedList() = default; + virtual ~LinkedList() = default; + + LinkedList(const LinkedList&) = delete; + LinkedList& operator=(const LinkedList&) = delete; + LinkedList(LinkedList&&) = delete; + LinkedList& operator=(LinkedList&&) = delete; virtual void push_front(T item) = 0; virtual void push_back(T item) = 0; - virtual LinkedListNode* push_at(T item, size_t index) = 0; + virtual bool push_at(T item, size_t index) = 0; virtual std::optional pop_front() = 0; virtual std::optional pop_back() = 0; @@ -31,6 +37,5 @@ namespace multithreading::structures::linked_list { virtual bool empty() = 0; virtual size_t size() = 0; virtual bool contains(T value) = 0; - virtual LinkedListNode* find(T value) = 0; }; } // namespace multithreading::structures::linked_list \ No newline at end of file diff --git a/multithreading/structures/include/linked_list/LockFreeLinkedList.h b/multithreading/structures/include/linked_list/LockFreeLinkedList.h index 157b658..779642f 100644 --- a/multithreading/structures/include/linked_list/LockFreeLinkedList.h +++ b/multithreading/structures/include/linked_list/LockFreeLinkedList.h @@ -1,16 +1,29 @@ #pragma once +#include +#include + #include "./LinkedList.h" + namespace multithreading::structures::linked_list { template - struct alignas(8) LockFreeNode { + struct alignas(8) LockFreeNode : public LinkedListNode { private: + static constexpr uintptr_t MARK_BIT = 0x1; + public: std::atomic*> next_node; - T node_value; - static constexpr uintptr_t MARK_BIT = 0x1; + LockFreeNode() + : LinkedListNode(T{}) + , next_node(nullptr) + {} + + explicit LockFreeNode(T value) + : LinkedListNode(value) + , next_node(nullptr) + {} static bool is_marked(LockFreeNode* node) { return (reinterpret_cast(node) & MARK_BIT) != 0; @@ -18,7 +31,7 @@ namespace multithreading::structures::linked_list { static LockFreeNode* unmark_node(LockFreeNode* node) { return reinterpret_cast*>( - reinterpret_cast(node & ~MARK_BIT) + reinterpret_cast(node) & ~MARK_BIT ); } @@ -27,16 +40,6 @@ namespace multithreading::structures::linked_list { reinterpret_cast(node) | MARK_BIT ); } - public: - LockFreeNode() - : next_node(nullptr) - , node_value(T{}) - {} - - explicit LockFreeNode(T value) - : next_node(nullptr) - , node_value(value) - {} [[nodiscard]] T value() { return this->node_value; @@ -196,7 +199,7 @@ namespace multithreading::structures::linked_list { } } - LinkedListNode* push_at(T item, const size_t index) { + ssize_t push_at(T item, const size_t index) { LockFreeNode* new_node = new LockFreeNode(item); while (true) { @@ -204,7 +207,7 @@ namespace multithreading::structures::linked_list { // If we are out-of-bounds in traverse then simply delete the new node and return. if (current_node == nullptr) { delete new_node; - return nullptr; + return -1; } LockFreeNode* next_node = current_node->next_node.load(std::memory_order_acquire); @@ -230,7 +233,7 @@ namespace multithreading::structures::linked_list { std::memory_order_relaxed); } - return new_node; + return 0; } else { // Our current_node changed somewhere in the process. // Retry on next iteration. @@ -301,10 +304,10 @@ namespace multithreading::structures::linked_list { // At this point last_node should already be unmarked, but we try to unmark // one more time for extra safety. - LockFreeNode* unmarked_last = LinkedListNode::unmark_node(last_node); + LockFreeNode* unmarked_last = LockFreeNode::unmark_node(last_node); T node_value = unmarked_last->value(); LockFreeNode* after_last = unmarked_last->next_node.load(std::memory_order_acquire); - LockFreeNode* marked_after = LinkedListNode::mark_node(after_last); + LockFreeNode* marked_after = LockFreeNode::mark_node(after_last); if (!unmarked_last->next_node.compare_exchange_weak( after_last, marked_after, @@ -474,16 +477,17 @@ namespace multithreading::structures::linked_list { }; template - class LockFreeLinkedList final : public LinkedList { + class LockFreeLinkedList : public LinkedList { private: std::unique_ptr> impl; public: - ~LockFreeLinkedList() override = default; - LockFreeLinkedList() - : impl(std::make_unique>()) + : LinkedList() + , impl(std::make_unique>()) {} + ~LockFreeLinkedList() override = default; + LockFreeLinkedList(const LockFreeLinkedList& other) = delete; LockFreeLinkedList& operator=(const LockFreeLinkedList& other) = delete; LockFreeLinkedList(LockFreeLinkedList&& other) = delete; @@ -492,31 +496,13 @@ namespace multithreading::structures::linked_list { void push_front(T item) override { impl->push_front(item); } - void push_front(T&& item) override { - impl->push_front(std::move(item)); - } - void push_front(const T& item) override { - impl->push_front(item); - } - void push_back(T item) override { impl->push_back(item); } - void push_back(T&& item) override { - impl->push_back(std::move(item)); - } - void push_back(const T& item) override { - impl->push_back(item); - } + bool push_at(T item, const size_t index) override { + const ssize_t operation_result = impl->push_at(item, index); - LinkedListNode* push_at(const T& item, const size_t index) override { - return impl->push_at(item, index); - } - LinkedListNode* push_at(T item, const size_t index) override { - return impl->push_at(item, index); - } - LinkedListNode* push_at(T&& item, const size_t index) override { - return impl->push_at(std::move(item), index); + return operation_result == 0; } std::optional pop_front() override { @@ -538,8 +524,5 @@ namespace multithreading::structures::linked_list { bool contains(T value) override { return impl->contains(value); } - LinkedListNode* find(T value) override { - return impl->find(value); - } }; } // namespace multithreading::structures::linked_list \ No newline at end of file From e74e0fdccf250d12fb796142047bb3c73a33f73e Mon Sep 17 00:00:00 2001 From: isoldpower Date: Thu, 5 Feb 2026 00:34:34 -0600 Subject: [PATCH 09/19] feat: epoch-based memory reclamation mechanism for safer list nodes deletion BREAKING CHANGE: for now the benchmark fails with double deletion error. It probably stems from racing conditions --- executables/linked_list_benchmark.cpp | 12 +- multithreading/structures/CMakeLists.txt | 2 +- .../include/linked_list/LockFreeLinkedList.h | 28 +++- .../include/performance/EpochGuard.h | 28 ++++ .../include/performance/EpochReclamation.h | 155 ++++++++++++++++++ 5 files changed, 213 insertions(+), 12 deletions(-) create mode 100644 multithreading/utilities/include/performance/EpochGuard.h create mode 100644 multithreading/utilities/include/performance/EpochReclamation.h diff --git a/executables/linked_list_benchmark.cpp b/executables/linked_list_benchmark.cpp index 7c6a75b..c83663c 100644 --- a/executables/linked_list_benchmark.cpp +++ b/executables/linked_list_benchmark.cpp @@ -6,7 +6,7 @@ #include #include -// #include +#include #include #include @@ -17,7 +17,7 @@ using multithreading::structures::linked_list::LinkedList; using multithreading::structures::linked_list::LockFreeLinkedList; -// using multithreading::structures::linked_list::FGLockLinkedList; +using multithreading::structures::linked_list::FGLockLinkedList; using multithreading::utilities::benchmark::BenchmarkTask; using multithreading::utilities::benchmark::ProducerConsumerBenchmark; @@ -70,10 +70,10 @@ auto main() -> int { std::make_shared>(), "Lock-free Linked List Benchmark" ), - // BenchmarkTask>( - // std::make_shared>(), - // "Fine-Grained Linked List Benchmark" - // ), + BenchmarkTask>( + std::make_shared>(), + "Fine-Grained Linked List Benchmark" + ), }; try { diff --git a/multithreading/structures/CMakeLists.txt b/multithreading/structures/CMakeLists.txt index be011c4..9635c1c 100644 --- a/multithreading/structures/CMakeLists.txt +++ b/multithreading/structures/CMakeLists.txt @@ -9,4 +9,4 @@ if (BUILD_TESTING) endif() include_directories(include) -target_link_libraries(structures PUBLIC cxx_setup) \ No newline at end of file +target_link_libraries(structures PUBLIC cxx_setup utilities) \ No newline at end of file diff --git a/multithreading/structures/include/linked_list/LockFreeLinkedList.h b/multithreading/structures/include/linked_list/LockFreeLinkedList.h index 779642f..c59431b 100644 --- a/multithreading/structures/include/linked_list/LockFreeLinkedList.h +++ b/multithreading/structures/include/linked_list/LockFreeLinkedList.h @@ -2,10 +2,11 @@ #include #include +#include +#include #include "./LinkedList.h" - namespace multithreading::structures::linked_list { template @@ -52,6 +53,8 @@ namespace multithreading::structures::linked_list { LockFreeNode* head; std::atomic*> tail; + utilities::performance::EpochReclamation> epoch_reclamation; + void try_help_advance(LockFreeNode* node, LockFreeNode* next_node) { if (node == nullptr) { return; @@ -121,6 +124,7 @@ namespace multithreading::structures::linked_list { LockFreeLinkedListImpl() : head(new LockFreeNode()) , tail(head) + , epoch_reclamation() {} ~LockFreeLinkedListImpl() { @@ -129,6 +133,7 @@ namespace multithreading::structures::linked_list { } void push_front(T item) { + utilities::performance::EpochGuard> epoch_guard(&epoch_reclamation); LockFreeNode* new_node = new LockFreeNode(item); while (true) { @@ -151,6 +156,7 @@ namespace multithreading::structures::linked_list { } void push_back(T item) { + utilities::performance::EpochGuard> epoch_guard(&epoch_reclamation); LockFreeNode* new_node = new LockFreeNode(item); while (true) { @@ -200,6 +206,7 @@ namespace multithreading::structures::linked_list { } ssize_t push_at(T item, const size_t index) { + utilities::performance::EpochGuard> epoch_guard(&epoch_reclamation); LockFreeNode* new_node = new LockFreeNode(item); while (true) { @@ -244,6 +251,8 @@ namespace multithreading::structures::linked_list { } std::optional pop_front() { + utilities::performance::EpochGuard> epoch_guard(&epoch_reclamation); + while (true) { LockFreeNode* current_head = head->next_node.load(std::memory_order_acquire); if (LockFreeNode::is_marked(current_head)) { @@ -266,6 +275,7 @@ namespace multithreading::structures::linked_list { std::memory_order_acquire )) { // The logical deletion was successful - move to the physical deletion. + epoch_reclamation.retire_reference(current_head); LockFreeNode* unmarked_next = LockFreeNode::unmark_node(next_node); head->next_node.compare_exchange_weak( current_head, @@ -275,7 +285,6 @@ namespace multithreading::structures::linked_list { // Ignore the CAS result and finish the deletion. delete nullptr semantics // are also valid in C++. - delete current_head; return node_value; } else { // CAS failed, someone updated the head->next. Retry in next iteration. @@ -286,6 +295,8 @@ namespace multithreading::structures::linked_list { } std::optional pop_back() { + utilities::performance::EpochGuard> epoch_guard(&epoch_reclamation); + while (true) { // Traverse to the node before-the-tail. LockFreeNode* before_last = this->traverse_to_second_to_last(); @@ -319,6 +330,7 @@ namespace multithreading::structures::linked_list { // At this point the logical deletion with marking was successful. Now we need // to physically delete the node from the list AND from memory. + epoch_reclamation.retire_reference(unmarked_last); before_last->next_node.compare_exchange_weak( unmarked_last, nullptr, @@ -333,13 +345,13 @@ namespace multithreading::structures::linked_list { std::memory_order_release, std::memory_order_relaxed); - // Free the related memory after the successful physical deletion of the node. - delete unmarked_last; return std::optional(node_value); } } std::optional pop_at(size_t index) { + utilities::performance::EpochGuard> epoch_guard(&epoch_reclamation); + while (true) { LockFreeNode* before_node = this->traverse_to(index); if (before_node == nullptr) { @@ -373,6 +385,7 @@ namespace multithreading::structures::linked_list { )) { // Successful logical delete. Move to physical delete. The next_node // is unmarked at this point due to the check above. + epoch_reclamation.retire_reference(unmarked_referenced); before_node->next_node.compare_exchange_weak( unmarked_referenced, next_node, @@ -389,7 +402,6 @@ namespace multithreading::structures::linked_list { std::memory_order_relaxed); } - delete unmarked_referenced; return std::optional(node_value); } else { // CAS update for the logical delete failed. Move to the next iteration. @@ -404,6 +416,8 @@ namespace multithreading::structures::linked_list { } size_t size() { + utilities::performance::EpochGuard> epoch_guard(&epoch_reclamation); + size_t iterator = 0; LockFreeNode* current_node = head->next_node.load(std::memory_order_acquire); @@ -426,6 +440,8 @@ namespace multithreading::structures::linked_list { } bool contains(const T& value) { + utilities::performance::EpochGuard> epoch_guard(&epoch_reclamation); + LockFreeNode* current_node = head->next_node.load(std::memory_order_acquire); // Traverse to the end of the list and count the size as we go. @@ -451,6 +467,8 @@ namespace multithreading::structures::linked_list { } LinkedListNode* find(const T& value) { + utilities::performance::EpochGuard> epoch_guard(&epoch_reclamation); + LockFreeNode* current_node = head->next_node.load(std::memory_order_acquire); // Traverse to the end of the list and count the size as we go. diff --git a/multithreading/utilities/include/performance/EpochGuard.h b/multithreading/utilities/include/performance/EpochGuard.h new file mode 100644 index 0000000..06535ca --- /dev/null +++ b/multithreading/utilities/include/performance/EpochGuard.h @@ -0,0 +1,28 @@ +#pragma once + +#include "./EpochReclamation.h" + + +namespace multithreading::utilities::performance { + + template + class EpochGuard { + private: + EpochReclamation* reclamation; + public: + explicit EpochGuard(EpochReclamation* reclamation) + : reclamation(reclamation) + { + reclamation->engage_epoch_user(); + } + + EpochGuard(const EpochGuard&) = delete; + EpochGuard& operator=(const EpochGuard&) = delete; + EpochGuard(EpochGuard&&) = delete; + EpochGuard& operator=(EpochGuard&&) = delete; + + ~EpochGuard() { + reclamation->free_epoch_user(); + } + }; +} // namespace multithreading::utilities::performance \ No newline at end of file diff --git a/multithreading/utilities/include/performance/EpochReclamation.h b/multithreading/utilities/include/performance/EpochReclamation.h new file mode 100644 index 0000000..bec7495 --- /dev/null +++ b/multithreading/utilities/include/performance/EpochReclamation.h @@ -0,0 +1,155 @@ +#pragma once + +#include +#include +#include +#include + +// TODO: Replace MAX_THREADS with vector + +namespace multithreading::utilities::performance { + + constexpr size_t EPOCH_WINDOW_SIZE = 2; + constexpr size_t MAX_THREADS = 128; + constexpr size_t INACTIVE_SIGN = static_cast(-1); + constexpr size_t RETIRE_CLUSTER = 8; + constexpr size_t RECLAMATION_FREQUENCY = 100; + + struct alignas(16) ThreadSlot { + std::atomic local_epoch{ INACTIVE_SIGN }; + std::atomic is_used{ false }; + }; + + template + struct RetireList { + std::vector retire_references; + std::mutex list_mutex; + }; + + template + class EpochReclamation { + private: + std::array thread_slots; + std::array, RETIRE_CLUSTER> retire_lists; + + static thread_local ssize_t my_thread_id; + static thread_local size_t reclamation_counter; + std::atomic last_reclaimed_epoch{0}; + std::atomic next_thread_id{0}; + std::atomic epoch_total{0}; + + size_t get_or_assign_thread_id() { + if (my_thread_id == -1) { + my_thread_id = next_thread_id.fetch_add(1, std::memory_order_relaxed); + + if (std::cmp_greater_equal(my_thread_id , MAX_THREADS)) { + throw std::runtime_error("Too many threads are registered for epoch reclamation"); + } else { + thread_slots[my_thread_id].is_used.store(true, std::memory_order_release); + } + } + + return my_thread_id; + } + + size_t get_oldest_active_epoch() const { + size_t min_epoch = epoch_total.load(std::memory_order_acquire); + + for (const auto& slot : thread_slots) { + if (slot.is_used.load(std::memory_order_acquire)) { + size_t const local_epoch = slot.local_epoch.load(std::memory_order_acquire); + + if (local_epoch != INACTIVE_SIGN) { + min_epoch = std::min(local_epoch, min_epoch); + } + } + } + + return min_epoch; + } + + void try_advance_epoch() { + size_t current_epoch = epoch_total.load(std::memory_order_acquire); + size_t const min_epoch = this->get_oldest_active_epoch(); + + if (min_epoch == current_epoch) { + epoch_total.compare_exchange_strong( + current_epoch, + current_epoch + 1, + std::memory_order_release, + std::memory_order_acquire); + } + } + + void reclaim_obsolete_memory() { + const size_t min_epoch = get_oldest_active_epoch(); + if (min_epoch < EPOCH_WINDOW_SIZE) { + return; + } + + const size_t safe_epoch = min_epoch - EPOCH_WINDOW_SIZE; + const size_t last_reclaimed = last_reclaimed_epoch.load(std::memory_order_acquire); + + for (size_t iterator_epoch = last_reclaimed + 1; iterator_epoch <= safe_epoch; ++iterator_epoch) { + size_t retire_list_index = iterator_epoch % RETIRE_CLUSTER; + RetireList& list = retire_lists[retire_list_index]; + + std::lock_guard list_guard(list.list_mutex); + + for (T* reference : list.retire_references) { + delete reference; + } + + list.retire_references.clear(); + } + + if (safe_epoch > last_reclaimed) { + last_reclaimed_epoch.store(safe_epoch, std::memory_order_release); + } + } + public: + EpochReclamation() = default; + ~EpochReclamation() { + for (auto& retire_list : retire_lists) { + std::lock_guard lock_guard(retire_list.list_mutex); + for (T* reference : retire_list.retire_references) { + delete reference; + } + + retire_list.retire_references.clear(); + } + } + + void engage_epoch_user() { + size_t const thread_id = this->get_or_assign_thread_id(); + const size_t current_epoch = epoch_total.load(std::memory_order_acquire); + + thread_slots[thread_id].local_epoch.store(current_epoch, std::memory_order_release); + } + + void free_epoch_user() { + size_t const thread_id = this->get_or_assign_thread_id(); + thread_slots[thread_id].local_epoch.store(INACTIVE_SIGN, std::memory_order_release); + + if (++reclamation_counter % RECLAMATION_FREQUENCY == 0) { + this->try_advance_epoch(); + this->reclaim_obsolete_memory(); + } + } + + void retire_reference(T* reference) { + size_t const current_epoch = epoch_total.load(std::memory_order_acquire); + size_t retire_list_index = current_epoch % RETIRE_CLUSTER; + RetireList& list = retire_lists[retire_list_index]; + + std::lock_guard list_guard(list.list_mutex); + list.retire_references.push_back(reference); + } + }; + + template + thread_local ssize_t EpochReclamation::my_thread_id = -1; + template + thread_local size_t EpochReclamation::reclamation_counter = 0; +} // namespace multithreading::utilities::performance + From 739b702936dd11b0d0a864c991a118e27578bea3 Mon Sep 17 00:00:00 2001 From: isoldpower Date: Thu, 5 Feb 2026 16:25:35 -0600 Subject: [PATCH 10/19] fix: working benchmarks and lock-free linked list structure --- .../include/linked_list/LockFreeLinkedList.h | 62 ++++++++++++++----- .../include/performance/EpochReclamation.h | 2 - 2 files changed, 45 insertions(+), 19 deletions(-) diff --git a/multithreading/structures/include/linked_list/LockFreeLinkedList.h b/multithreading/structures/include/linked_list/LockFreeLinkedList.h index c59431b..3cbc61a 100644 --- a/multithreading/structures/include/linked_list/LockFreeLinkedList.h +++ b/multithreading/structures/include/linked_list/LockFreeLinkedList.h @@ -66,11 +66,14 @@ namespace multithreading::structures::linked_list { LockFreeNode* unmarked_after_next = LockFreeNode::unmark_node(after_next); // Help advance (physical delete) the next_node, then skip. - node->next_node.compare_exchange_weak( + if (node->next_node.compare_exchange_weak( next_node, unmarked_after_next, std::memory_order_release, - std::memory_order_acquire); + std::memory_order_acquire + )) { + epoch_reclamation.retire_reference(unmarked_next); + } // Ignore if we fail. } @@ -264,10 +267,22 @@ namespace multithreading::structures::linked_list { return std::nullopt; } else { // The next is not marked, move to deletion. - T node_value = current_head->value(); LockFreeNode* next_node = current_head->next_node.load(std::memory_order_acquire); - LockFreeNode* marked_next = LockFreeNode::mark_node(next_node); + if (LockFreeNode::is_marked(next_node)) { + LockFreeNode* unmarked_next = LockFreeNode::unmark_node(next_node); + if (head->next_node.compare_exchange_weak( + current_head, + unmarked_next, + std::memory_order_release, + std::memory_order_acquire + )) { + epoch_reclamation.retire_reference(current_head); + } + + continue; + } + LockFreeNode* marked_next = LockFreeNode::mark_node(next_node); if (current_head->next_node.compare_exchange_weak( next_node, marked_next, @@ -275,16 +290,17 @@ namespace multithreading::structures::linked_list { std::memory_order_acquire )) { // The logical deletion was successful - move to the physical deletion. - epoch_reclamation.retire_reference(current_head); + T node_value = current_head->value(); LockFreeNode* unmarked_next = LockFreeNode::unmark_node(next_node); - head->next_node.compare_exchange_weak( + if (head->next_node.compare_exchange_weak( current_head, unmarked_next, std::memory_order_release, - std::memory_order_relaxed); + std::memory_order_relaxed + )) { + epoch_reclamation.retire_reference(current_head); + } - // Ignore the CAS result and finish the deletion. delete nullptr semantics - // are also valid in C++. return node_value; } else { // CAS failed, someone updated the head->next. Retry in next iteration. @@ -316,9 +332,15 @@ namespace multithreading::structures::linked_list { // At this point last_node should already be unmarked, but we try to unmark // one more time for extra safety. LockFreeNode* unmarked_last = LockFreeNode::unmark_node(last_node); - T node_value = unmarked_last->value(); LockFreeNode* after_last = unmarked_last->next_node.load(std::memory_order_acquire); + if (LockFreeNode::is_marked(after_last)) { + // Someone else is deleting this node + this->try_help_advance(before_last, last_node); + continue; + } + LockFreeNode* marked_after = LockFreeNode::mark_node(after_last); + T node_value = unmarked_last->value(); if (!unmarked_last->next_node.compare_exchange_weak( after_last, marked_after, @@ -330,15 +352,19 @@ namespace multithreading::structures::linked_list { // At this point the logical deletion with marking was successful. Now we need // to physically delete the node from the list AND from memory. - epoch_reclamation.retire_reference(unmarked_last); - before_last->next_node.compare_exchange_weak( + if (!before_last->next_node.compare_exchange_weak( unmarked_last, nullptr, std::memory_order_release, - std::memory_order_relaxed); + std::memory_order_relaxed + )) { + epoch_reclamation.retire_reference(unmarked_last); + } + // We ignore all the potential CAS fails because after the logical delete // succeeded our tail and before_last are already in a safe state. The potential - // fails only mean that other threads may have helped advancing the state. + // fails only mean that other threads may have helped advancing the state and + // retiring the node. tail.compare_exchange_weak( unmarked_last, before_last, @@ -385,12 +411,14 @@ namespace multithreading::structures::linked_list { )) { // Successful logical delete. Move to physical delete. The next_node // is unmarked at this point due to the check above. - epoch_reclamation.retire_reference(unmarked_referenced); - before_node->next_node.compare_exchange_weak( + if (before_node->next_node.compare_exchange_weak( unmarked_referenced, next_node, std::memory_order_release, - std::memory_order_relaxed); + std::memory_order_relaxed + )) { + epoch_reclamation.retire_reference(unmarked_referenced); + } // If we popped the last node - try advance the tail to reference the // previous node. diff --git a/multithreading/utilities/include/performance/EpochReclamation.h b/multithreading/utilities/include/performance/EpochReclamation.h index bec7495..07f1eb8 100644 --- a/multithreading/utilities/include/performance/EpochReclamation.h +++ b/multithreading/utilities/include/performance/EpochReclamation.h @@ -5,8 +5,6 @@ #include #include -// TODO: Replace MAX_THREADS with vector - namespace multithreading::utilities::performance { constexpr size_t EPOCH_WINDOW_SIZE = 2; From 17a8e107a4ed31f5a63c6c105ede5d583ce5e70a Mon Sep 17 00:00:00 2001 From: Nikita Lapin Date: Sun, 8 Feb 2026 23:39:54 -0600 Subject: [PATCH 11/19] chore: reworked benchmark measurements system --- executables/bounded_queue_benchmark.cpp | 47 +++---- executables/linked_list_benchmark.cpp | 51 +++---- executables/unbounded_queue_benchmark.cpp | 43 ++---- multithreading/utilities/CMakeLists.txt | 5 +- .../utilities/include/ConsoleOutput.h | 6 + .../include/benchmark/BenchmarkMeasurer.h | 43 +++++- .../include/benchmark/display/ConsoleOutput.h | 21 +++ .../benchmark/monitor/BenchmarkMeasurement.h | 58 ++++++++ .../benchmark/monitor/BenchmarkMonitor.h | 132 ++++++++++++++++++ .../benchmark/monitor/SpeedMeasurement.h | 51 +++++++ .../src/benchmark/BenchmarkMeasurer.cpp | 32 ----- 11 files changed, 363 insertions(+), 126 deletions(-) create mode 100644 multithreading/utilities/include/ConsoleOutput.h create mode 100644 multithreading/utilities/include/benchmark/display/ConsoleOutput.h create mode 100644 multithreading/utilities/include/benchmark/monitor/BenchmarkMeasurement.h create mode 100644 multithreading/utilities/include/benchmark/monitor/BenchmarkMonitor.h create mode 100644 multithreading/utilities/include/benchmark/monitor/SpeedMeasurement.h delete mode 100644 multithreading/utilities/src/benchmark/BenchmarkMeasurer.cpp diff --git a/executables/bounded_queue_benchmark.cpp b/executables/bounded_queue_benchmark.cpp index adb61e7..ec808b9 100644 --- a/executables/bounded_queue_benchmark.cpp +++ b/executables/bounded_queue_benchmark.cpp @@ -6,51 +6,42 @@ #include #include #include +#include +#include #include #include "./benchmarks/include/ThreadConfig.h" #include "./benchmarks/include/mcmp/BoundedQueueMCMPBenchmark.h" -using multithreading::structures::bounded_queue::FGLockBoundedQueue; -using multithreading::structures::bounded_queue::BoundedQueue; -using multithreading::structures::bounded_queue::FGLockBoundedQueue; -using multithreading::structures::bounded_queue::LockFreeBoundedQueue; - -using multithreading::utilities::benchmark::BenchmarkTask; -using multithreading::utilities::benchmark::ProducerConsumerBenchmark; -using multithreading::utilities::benchmark::BenchmarkMeasurer; -using multithreading::utilities::benchmark::BenchmarkRunner; -using multithreading::utilities::benchmark::mcmp::MCMPBenchmarkRunner; - -using executables::benchmarks::mcmp::BoundedQueueMCMPBenchmark; -using executables::benchmarks::THREADS_COUNT; -using executables::benchmarks::THREAD_SIZE; -using executables::benchmarks::QUEUE_SIZE; +using namespace multithreading::structures::bounded_queue; +using namespace multithreading::utilities::benchmark; namespace executables { + template static void benchmarkApplication(const std::array>, N>& queues) { - const multithreading::utilities::benchmark::BenchmarkMatrixDefinition matrix { - .per_thread_sizes = std::vector{ THREAD_SIZE, THREAD_SIZE * 10 }, - .threads_count = std::vector{ THREADS_COUNT, THREADS_COUNT * 2 } + const BenchmarkMatrixDefinition matrix { + .per_thread_sizes = std::vector{ benchmarks::THREAD_SIZE, benchmarks::THREAD_SIZE * 10 }, + .threads_count = std::vector{ benchmarks::THREADS_COUNT, benchmarks::THREADS_COUNT * 2 } }; - const BenchmarkMeasurer matrixMeasurer(matrix); + const auto monitor = std::make_shared>( + RefMeasurementsList( + new SpeedMeasurement({ .verbose = "Execution Time" }) + ) + ); + const BenchmarkMeasurer matrixMeasurer(matrix, monitor); for (const auto &queue : queues) { std::shared_ptr const benchmark = - std::make_shared(queue.structure); + std::make_shared(queue.structure); std::shared_ptr const runner = - std::make_shared(benchmark); + std::make_shared(benchmark); std::cout << queue.title << "\n"; const auto results = matrixMeasurer.measure_benchmark(runner); - for (const auto &[threads_count, thread_size, execution_time] : results) { - std::cout << "Threads (" << threads_count << ") Size (" << thread_size << ")\n"; - std::cout << "\tExecution time: " << execution_time.count() << "ns\n"; - } - std::cout << '\n'; + display::displayBenchmarkResults(results); } } } // namespace executables @@ -66,11 +57,11 @@ auto main() -> int { ); const std::array queues { BenchmarkTask>( - std::make_shared>(QUEUE_SIZE), + std::make_shared>(executables::benchmarks::QUEUE_SIZE), "Lock-free Lock Queue Benchmark" ), BenchmarkTask>( - std::make_shared>(QUEUE_SIZE), + std::make_shared>(executables::benchmarks::QUEUE_SIZE), "Fine-Grained Lock Queue Benchmark" ), }; diff --git a/executables/linked_list_benchmark.cpp b/executables/linked_list_benchmark.cpp index c83663c..3f0849d 100644 --- a/executables/linked_list_benchmark.cpp +++ b/executables/linked_list_benchmark.cpp @@ -1,56 +1,45 @@ +#include +#include +#include #include #include #include #include #include - -#include -#include -#include - +#include +#include +#include #include -#include -#include +#include #include "./benchmarks/include/ThreadConfig.h" #include "./benchmarks/include/mcmp/LinkedListMCMPBenchmark.h" -using multithreading::structures::linked_list::LinkedList; -using multithreading::structures::linked_list::LockFreeLinkedList; -using multithreading::structures::linked_list::FGLockLinkedList; - -using multithreading::utilities::benchmark::BenchmarkTask; -using multithreading::utilities::benchmark::ProducerConsumerBenchmark; -using multithreading::utilities::benchmark::BenchmarkMeasurer; -using multithreading::utilities::benchmark::BenchmarkRunner; -using multithreading::utilities::benchmark::mcmp::MCMPBenchmarkRunner; -using executables::benchmarks::mcmp::LinkedListMCMPBenchmark; - -using executables::benchmarks::THREADS_COUNT; -using executables::benchmarks::THREAD_SIZE; +using namespace multithreading::structures::linked_list; +using namespace multithreading::utilities::benchmark; namespace executables { + template static void benchmarkApplication(const std::array>, N>& lists) { - const multithreading::utilities::benchmark::BenchmarkMatrixDefinition matrix { - .per_thread_sizes = std::vector{ THREAD_SIZE, THREAD_SIZE * 10 }, - .threads_count = std::vector{ THREADS_COUNT, THREADS_COUNT * 2 } + const BenchmarkMatrixDefinition matrix { + .per_thread_sizes = std::vector{ benchmarks::THREAD_SIZE, benchmarks::THREAD_SIZE * 10 }, + .threads_count = std::vector{ benchmarks::THREADS_COUNT, benchmarks::THREADS_COUNT * 2 } }; - const BenchmarkMeasurer matrixMeasurer(matrix); + const auto monitor = std::make_shared>(RefMeasurementsList( + new SpeedMeasurement({ .verbose = "Execution Time" }) + )); + const BenchmarkMeasurer matrixMeasurer(matrix, monitor); for (const auto &list : lists) { std::shared_ptr const benchmark = - std::make_shared(list.structure); + std::make_shared(list.structure); std::shared_ptr const runner = - std::make_shared(benchmark); + std::make_shared(benchmark); std::cout << list.title << "\n"; const auto results = matrixMeasurer.measure_benchmark(runner); - for (const auto &[threads_count, thread_size, execution_time] : results) { - std::cout << "Threads (" << threads_count << ") Size (" << thread_size << ")\n"; - std::cout << "\tExecution time: " << execution_time.count() << "mics\n"; - } - std::cout << '\n'; + display::displayBenchmarkResults(results); } } } // namespace executables diff --git a/executables/unbounded_queue_benchmark.cpp b/executables/unbounded_queue_benchmark.cpp index 05ede54..e9fed0a 100644 --- a/executables/unbounded_queue_benchmark.cpp +++ b/executables/unbounded_queue_benchmark.cpp @@ -6,51 +6,38 @@ #include #include #include - +#include +#include #include #include "./benchmarks/include/ThreadConfig.h" #include "./benchmarks/include/mcmp/UnboundedQueueMCMPBenchmark.h" -using multithreading::structures::unbounded_queue::FGLockUnboundedQueue; -using multithreading::structures::unbounded_queue::UnboundedQueue; -using multithreading::structures::unbounded_queue::FGLockUnboundedQueue; -using multithreading::structures::unbounded_queue::LockFreeUnboundedQueue; -using multithreading::structures::unbounded_queue::LockFreeQueueConfig; - -using multithreading::utilities::benchmark::BenchmarkTask; -using multithreading::utilities::benchmark::ProducerConsumerBenchmark; -using multithreading::utilities::benchmark::BenchmarkMeasurer; -using multithreading::utilities::benchmark::BenchmarkRunner; -using multithreading::utilities::benchmark::mcmp::MCMPBenchmarkRunner; - -using executables::benchmarks::mcmp::UnboundedQueueMCMPBenchmark; -using executables::benchmarks::THREADS_COUNT; -using executables::benchmarks::THREAD_SIZE; - +using namespace multithreading::structures::unbounded_queue; +using namespace multithreading::utilities::benchmark; namespace executables { + template static void benchmarkApplication(const std::array>, N>& queues) { - const multithreading::utilities::benchmark::BenchmarkMatrixDefinition matrix { - .per_thread_sizes = std::vector{ THREAD_SIZE, THREAD_SIZE * 10 }, - .threads_count = std::vector{ THREADS_COUNT, THREADS_COUNT * 2 } + const BenchmarkMatrixDefinition matrix { + .per_thread_sizes = std::vector{ benchmarks::THREAD_SIZE, benchmarks::THREAD_SIZE * 10 }, + .threads_count = std::vector{ benchmarks::THREADS_COUNT, benchmarks::THREADS_COUNT * 2 } }; - const BenchmarkMeasurer matrixMeasurer(matrix); + const auto monitor = std::make_shared>(RefMeasurementsList( + new SpeedMeasurement({ .verbose = "Execution Time" }) + )); + const BenchmarkMeasurer matrixMeasurer(matrix, monitor); for (const auto &queue : queues) { std::shared_ptr const benchmark = - std::make_shared(queue.structure); + std::make_shared(queue.structure); std::shared_ptr const runner = - std::make_shared(benchmark); + std::make_shared(benchmark); std::cout << queue.title << "\n"; const auto results = matrixMeasurer.measure_benchmark(runner); - for (const auto &[threads_count, thread_size, execution_time] : results) { - std::cout << "Threads (" << threads_count << ") Size (" << thread_size << ")\n"; - std::cout << "\tExecution time: " << execution_time.count() << "mics\n"; - } - std::cout << '\n'; + display::displayBenchmarkResults(results); } } } // namespace executables diff --git a/multithreading/utilities/CMakeLists.txt b/multithreading/utilities/CMakeLists.txt index ea54d1b..4163150 100644 --- a/multithreading/utilities/CMakeLists.txt +++ b/multithreading/utilities/CMakeLists.txt @@ -1,6 +1,7 @@ add_library(utilities - src/benchmark/BenchmarkMeasurer.cpp - src/benchmark/mcmp/MCMPBenchmarkRunner.cpp) + src/benchmark/mcmp/MCMPBenchmarkRunner.cpp + include/benchmark/monitor/SpeedMeasurement.h + include/ConsoleOutput.h) if (BUILD_TESTING) add_executable(utilities_tests tests/main.cpp) diff --git a/multithreading/utilities/include/ConsoleOutput.h b/multithreading/utilities/include/ConsoleOutput.h new file mode 100644 index 0000000..ef8e540 --- /dev/null +++ b/multithreading/utilities/include/ConsoleOutput.h @@ -0,0 +1,6 @@ +#pragma once + + +namespace multithreading::utilities { + +} // namespace multithreading::utilities \ No newline at end of file diff --git a/multithreading/utilities/include/benchmark/BenchmarkMeasurer.h b/multithreading/utilities/include/benchmark/BenchmarkMeasurer.h index 5822e67..01e1aeb 100644 --- a/multithreading/utilities/include/benchmark/BenchmarkMeasurer.h +++ b/multithreading/utilities/include/benchmark/BenchmarkMeasurer.h @@ -1,27 +1,60 @@ #pragma once +#include + #include "./BenchmarkMatrix.h" #include "./BenchmarkRunner.h" - +#include "monitor/BenchmarkMeasurement.h" +#include "monitor/BenchmarkMonitor.h" namespace multithreading::utilities::benchmark { + template struct alignas(64) BenchmarkResult { size_t threads_count; size_t thread_size; - std::chrono::high_resolution_clock::duration execution_time; + std::tuple...> measurements; }; + template class BenchmarkMeasurer { private: BenchmarkMatrixDefinition benchmark_matrix; + std::shared_ptr> benchmark_monitor; public: - explicit BenchmarkMeasurer(BenchmarkMatrixDefinition matrix) + explicit BenchmarkMeasurer( + BenchmarkMatrixDefinition matrix, + std::shared_ptr> monitor + ) : benchmark_matrix(std::move(matrix)) + , benchmark_monitor(monitor) {} - std::vector measure_benchmark( + [[nodiscard]] std::vector> measure_benchmark( const std::shared_ptr& benchmark_runner - ) const; + ) const { + std::vector> benchmark_results; + + for (const size_t threads_count : benchmark_matrix.threads_count) { + for (const size_t thread_size : benchmark_matrix.per_thread_sizes) { + this->benchmark_monitor->start_monitoring(); + benchmark_runner->run_benchmark_with({ + .threads_count = threads_count, + .thread_size = thread_size + }); + this->benchmark_monitor->stop_monitoring(); + + if (auto measurements = this->benchmark_monitor->get_results(); measurements.has_value()) { + benchmark_results.emplace_back(BenchmarkResult{ + .threads_count = threads_count, + .thread_size = thread_size, + .measurements = measurements.value() + }); + } + } + } + + return benchmark_results; + } }; } // namespace multithreading::utilities::benchmark \ No newline at end of file diff --git a/multithreading/utilities/include/benchmark/display/ConsoleOutput.h b/multithreading/utilities/include/benchmark/display/ConsoleOutput.h new file mode 100644 index 0000000..331bd21 --- /dev/null +++ b/multithreading/utilities/include/benchmark/display/ConsoleOutput.h @@ -0,0 +1,21 @@ +#pragma once + +#include +#include + +namespace multithreading::utilities::benchmark::display { + + template + static void displayBenchmarkResults( + std::vector> results + ) { + for (const auto &[threads_count, thread_size, measurements] : results) { + std::cout << "Threads (" << threads_count << ") Size (" << thread_size << ")\n"; + std::apply([&](const auto&... measurement) { + ((std::cout << "\t" << measurement.get_verbose() << ": " + << measurement.get_measurement().value() << std::endl), ...); + }, measurements); + } + std::cout << '\n'; + } +} // namespace multithreading::utilities::benchmark::display diff --git a/multithreading/utilities/include/benchmark/monitor/BenchmarkMeasurement.h b/multithreading/utilities/include/benchmark/monitor/BenchmarkMeasurement.h new file mode 100644 index 0000000..317df64 --- /dev/null +++ b/multithreading/utilities/include/benchmark/monitor/BenchmarkMeasurement.h @@ -0,0 +1,58 @@ +#include + +#pragma once + + +namespace multithreading::utilities::benchmark { + + struct BenchmarkMeasurementTemplate { + public: + std::string verbose; + }; + + template + struct BenchmarkMeasurementResult { + private: + BenchmarkMeasurementTemplate reference_template; + T result; + public: + explicit BenchmarkMeasurementResult(BenchmarkMeasurementTemplate measurement, T result) + : reference_template(std::move(measurement)) + , result(result) + {} + + [[nodiscard]] std::string get_verbose() const { + return reference_template.verbose; + } + + [[nodiscard]] std::optional get_measurement() const { + return result; + } + }; + + template + class BenchmarkMeasurement { + protected: + bool is_started; + bool is_measured; + BenchmarkMeasurementTemplate information; + public: + virtual ~BenchmarkMeasurement() = default; + explicit BenchmarkMeasurement(BenchmarkMeasurementTemplate measurement) + : is_started(false) + , is_measured(false) + , information(std::move(measurement)) + {} + + virtual void start() = 0; + virtual void snapshot() = 0; + virtual void stop() = 0; + virtual std::optional> get_result() = 0; + }; + + template + using MeasurementsList = std::tuple...>; + + template + using RefMeasurementsList = std::tuple*...>; +} // namespace multithreading::utilities::benchmark \ No newline at end of file diff --git a/multithreading/utilities/include/benchmark/monitor/BenchmarkMonitor.h b/multithreading/utilities/include/benchmark/monitor/BenchmarkMonitor.h new file mode 100644 index 0000000..ca0156d --- /dev/null +++ b/multithreading/utilities/include/benchmark/monitor/BenchmarkMonitor.h @@ -0,0 +1,132 @@ +#pragma once + +#include +#include +#include +#include + +#include "./BenchmarkMeasurement.h" + +namespace multithreading::utilities::benchmark { + + constexpr size_t DEFAULT_HEARTBEAT_RATE = 10; + + template + class BenchmarkMonitor { + private: + std::tuple*...> measurements; + std::chrono::high_resolution_clock::duration heartbeat; + + std::atomic benchmark_running; + std::thread heartbeat_thread; + mutable std::mutex measurements_mutex; + + void monitor_benchmarks() { + while (benchmark_running.load(std::memory_order_acquire)) { + { + std::scoped_lock lock(measurements_mutex); + std::apply([&](auto*... measurements) { + (measurements->snapshot(), ...); + }, measurements); + } + + std::this_thread::sleep_for(heartbeat); + } + } + public: + BenchmarkMonitor( + const std::tuple*...>& measurements, + const size_t heartbeat + ) + : measurements(measurements) + , heartbeat(std::chrono::milliseconds(heartbeat)) + , benchmark_running(false) + {} + + explicit BenchmarkMonitor( + const std::tuple*...>& measurements + ) + : measurements(measurements) + , heartbeat(std::chrono::milliseconds(DEFAULT_HEARTBEAT_RATE)) + , benchmark_running(false) + {} + + bool start_monitoring() { + bool isRunning = benchmark_running.load(std::memory_order_acquire); + + if (isRunning || heartbeat_thread.joinable()) { + std::cerr << "Could not run BenchmarkMonitor::startMonitoring() as the process " + "is already running\n"; + return false; + } else if (!benchmark_running.compare_exchange_strong( + isRunning, + true, + std::memory_order_release, + std::memory_order_acquire + )) { + std::cerr << "Lost race condition in monitoring process from BenchmarkMonitor\n"; + return false; + } + + { + std::scoped_lock lock(measurements_mutex); + std::apply([&](auto*... measurements) { + (measurements->start(), ...); + }, measurements); + } + + heartbeat_thread = std::thread(&BenchmarkMonitor::monitor_benchmarks, this); + return true; + } + + bool stop_monitoring() { + bool isRunning = benchmark_running.load(std::memory_order_acquire); + if (!isRunning || !heartbeat_thread.joinable()) { + std::cerr << "Could not run BenchmarkMonitor::stopMonitoring() as the process " + "is NOT running or was already stopped\n"; + + return false; + } else if (!benchmark_running.compare_exchange_strong( + isRunning, + false, + std::memory_order_release, + std::memory_order_acquire + )) { + std::cerr << "Lost race condition in stopping monitoring process from BenchmarkMonitor\n"; + + return false; + } + + heartbeat_thread.join(); + { + std::scoped_lock lock(measurements_mutex); + std::apply([&](auto*... measurements) { + (measurements->stop(), ...); + }, measurements); + } + + return true; + } + + std::optional...>> get_results() { + const bool isRunning = benchmark_running.load(std::memory_order_acquire); + if (isRunning || heartbeat_thread.joinable()) { + std::cerr << "Could not run BenchmarkMonitor::get_results() as the benchmark " + "is in process\n"; + + return std::nullopt; + } + + std::tuple...> results = [&]() { + std::scoped_lock lock(measurements_mutex); + return std::apply([](auto*... measurement) { + return std::tuple...>{ + measurement->get_result().value()... + }; + }, measurements); + }(); + + return results; + } + }; +} // namespace multithreading::utilities::benchmark \ No newline at end of file diff --git a/multithreading/utilities/include/benchmark/monitor/SpeedMeasurement.h b/multithreading/utilities/include/benchmark/monitor/SpeedMeasurement.h new file mode 100644 index 0000000..4eb1e94 --- /dev/null +++ b/multithreading/utilities/include/benchmark/monitor/SpeedMeasurement.h @@ -0,0 +1,51 @@ +#pragma once +#include + +#include "BenchmarkMeasurement.h" + +namespace multithreading::utilities::benchmark { + + using DurationType = std::chrono::high_resolution_clock::duration; + + class SpeedMeasurement final + : public BenchmarkMeasurement + { + private: + std::chrono::high_resolution_clock::time_point start_time; + std::chrono::high_resolution_clock::time_point end_time; + public: + ~SpeedMeasurement() override = default; + explicit SpeedMeasurement(const BenchmarkMeasurementTemplate &measurement) + : BenchmarkMeasurement(measurement) + {} + + void start() override { + this->is_started = true; + this->start_time = std::chrono::high_resolution_clock::now(); + } + + void snapshot() override {} + + void stop() override { + this->is_measured = true; + this->is_started = false; + this->end_time = std::chrono::high_resolution_clock::now(); + } + + std::optional< + BenchmarkMeasurementResult + > get_result() override { + if (!is_measured || is_started) { + return std::nullopt; + } + + return BenchmarkMeasurementResult( + information, + std::chrono::duration_cast( + end_time - start_time + ) + ); + } + }; + +} // namespace multithreading::utilities::benchmark \ No newline at end of file diff --git a/multithreading/utilities/src/benchmark/BenchmarkMeasurer.cpp b/multithreading/utilities/src/benchmark/BenchmarkMeasurer.cpp deleted file mode 100644 index 66a0666..0000000 --- a/multithreading/utilities/src/benchmark/BenchmarkMeasurer.cpp +++ /dev/null @@ -1,32 +0,0 @@ -#include "../../include/benchmark/BenchmarkMeasurer.h" - - -namespace multithreading::utilities::benchmark { - - std::vector BenchmarkMeasurer::measure_benchmark( - const std::shared_ptr &benchmark_runner - ) const { - std::vector benchmark_results; - - for (const size_t threads_count : benchmark_matrix.threads_count) { - for (const size_t thread_size : benchmark_matrix.per_thread_sizes) { - const auto start_time = std::chrono::high_resolution_clock::now(); - benchmark_runner->run_benchmark_with({ - .threads_count = threads_count, - .thread_size = thread_size - }); - const auto end_time = std::chrono::high_resolution_clock::now(); - - benchmark_results.emplace_back(BenchmarkResult{ - .threads_count = threads_count, - .thread_size = thread_size, - .execution_time = std::chrono::duration_cast( - end_time - start_time - ) - }); - } - } - - return benchmark_results; - } -} // namespace multithreading::utilities::benchmark \ No newline at end of file From 0dcd79634789486fffda4cb3a48a8d4f3876c9fa Mon Sep 17 00:00:00 2001 From: Nikita Lapin Date: Mon, 9 Feb 2026 00:12:16 -0600 Subject: [PATCH 12/19] chore: emit benchmarking into separate library --- CMakeLists.txt | 3 +- executables/CMakeLists.txt | 6 +-- .../include/mcmp/BoundedQueueMCMPBenchmark.h | 4 +- .../include/mcmp/LinkedListMCMPBenchmark.h | 8 +-- .../mcmp/UnboundedQueueMCMPBenchmark.h | 4 +- executables/bounded_queue_benchmark.cpp | 20 ++++---- executables/linked_list_benchmark.cpp | 20 ++++---- executables/unbounded_queue_benchmark.cpp | 18 +++---- multithreading/CMakeLists.txt | 3 +- multithreading/benchmark/CMakeLists.txt | 13 +++++ .../include}/BenchmarkMatrix.h | 4 +- .../include}/BenchmarkMeasurement.h | 11 ++-- .../include}/BenchmarkMeasurer.h | 11 ++-- .../include}/BenchmarkMonitor.h | 5 +- .../benchmark/include/BenchmarkRunner.h | 14 +++++ .../include}/MultithreadingTask.h | 5 +- .../benchmark/include/SpeedMeasurement.h | 29 +++++++++++ .../include}/mcmp/MCMPBenchmarkRunner.h | 9 ++-- .../include/mcmp}/ProducerConsumerBenchmark.h | 4 +- .../benchmark/include/views/ConsoleOutput.h | 26 ++++++++++ .../benchmark/src/SpeedMeasurement.cpp | 37 ++++++++++++++ .../src}/mcmp/MCMPBenchmarkRunner.cpp | 6 +-- multithreading/benchmark/tests/main.cpp | 9 ++++ .../bounded_queue/LockFreeBoundedQueue.h | 2 +- multithreading/utilities/CMakeLists.txt | 4 +- .../utilities/include/ConsoleOutput.h | 6 --- .../include/benchmark/BenchmarkRunner.h | 17 ------- .../include/benchmark/display/ConsoleOutput.h | 21 -------- .../benchmark/monitor/SpeedMeasurement.h | 51 ------------------- multithreading/utilities/src/placeholder.cpp | 4 ++ 30 files changed, 206 insertions(+), 168 deletions(-) create mode 100644 multithreading/benchmark/CMakeLists.txt rename multithreading/{utilities/include/benchmark => benchmark/include}/BenchmarkMatrix.h (75%) rename multithreading/{utilities/include/benchmark/monitor => benchmark/include}/BenchmarkMeasurement.h (90%) rename multithreading/{utilities/include/benchmark => benchmark/include}/BenchmarkMeasurer.h (90%) rename multithreading/{utilities/include/benchmark/monitor => benchmark/include}/BenchmarkMonitor.h (97%) create mode 100644 multithreading/benchmark/include/BenchmarkRunner.h rename multithreading/{utilities/include/benchmark => benchmark/include}/MultithreadingTask.h (88%) create mode 100644 multithreading/benchmark/include/SpeedMeasurement.h rename multithreading/{utilities/include/benchmark => benchmark/include}/mcmp/MCMPBenchmarkRunner.h (77%) rename multithreading/{utilities/include/benchmark => benchmark/include/mcmp}/ProducerConsumerBenchmark.h (73%) create mode 100644 multithreading/benchmark/include/views/ConsoleOutput.h create mode 100644 multithreading/benchmark/src/SpeedMeasurement.cpp rename multithreading/{utilities/src/benchmark => benchmark/src}/mcmp/MCMPBenchmarkRunner.cpp (84%) create mode 100644 multithreading/benchmark/tests/main.cpp delete mode 100644 multithreading/utilities/include/ConsoleOutput.h delete mode 100644 multithreading/utilities/include/benchmark/BenchmarkRunner.h delete mode 100644 multithreading/utilities/include/benchmark/display/ConsoleOutput.h delete mode 100644 multithreading/utilities/include/benchmark/monitor/SpeedMeasurement.h create mode 100644 multithreading/utilities/src/placeholder.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index bcc9e05..1846db5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,8 @@ else() endif() # Libraries list -add_library(cxx_setup INTERFACE multithreading/utilities) +add_library(cxx_setup INTERFACE multithreading/utilities + multithreading/benchmark/src/SpeedMeasurement.cpp) # General compiler options if (MSVC) diff --git a/executables/CMakeLists.txt b/executables/CMakeLists.txt index 93023a0..75619a6 100644 --- a/executables/CMakeLists.txt +++ b/executables/CMakeLists.txt @@ -11,6 +11,6 @@ add_executable(linked_list_benchmark benchmarks/src/LinkedListBenchmark.cpp benchmarks/src/mcmp/LinkedListMCMPBenchmark.cpp) -target_link_libraries(unbounded_queue_benchmark PRIVATE utilities structures cxx_setup) -target_link_libraries(bounded_queue_benchmark PRIVATE utilities structures cxx_setup) -target_link_libraries(linked_list_benchmark PRIVATE utilities structures cxx_setup) \ No newline at end of file +target_link_libraries(unbounded_queue_benchmark PRIVATE utilities structures benchmark cxx_setup) +target_link_libraries(bounded_queue_benchmark PRIVATE utilities structures benchmark cxx_setup) +target_link_libraries(linked_list_benchmark PRIVATE utilities structures benchmark cxx_setup) \ No newline at end of file diff --git a/executables/benchmarks/include/mcmp/BoundedQueueMCMPBenchmark.h b/executables/benchmarks/include/mcmp/BoundedQueueMCMPBenchmark.h index 11cfdc3..0cd45e1 100644 --- a/executables/benchmarks/include/mcmp/BoundedQueueMCMPBenchmark.h +++ b/executables/benchmarks/include/mcmp/BoundedQueueMCMPBenchmark.h @@ -1,13 +1,13 @@ #pragma once -#include +#include #include "../BoundedQueueBenchmark.h" namespace executables::benchmarks::mcmp { class BoundedQueueMCMPBenchmark final - : public multithreading::utilities::benchmark::ProducerConsumerBenchmark + : public multithreading::benchmark::mcmp::ProducerConsumerBenchmark , BoundedQueueBenchmark { public: diff --git a/executables/benchmarks/include/mcmp/LinkedListMCMPBenchmark.h b/executables/benchmarks/include/mcmp/LinkedListMCMPBenchmark.h index 66679b0..93f60bc 100644 --- a/executables/benchmarks/include/mcmp/LinkedListMCMPBenchmark.h +++ b/executables/benchmarks/include/mcmp/LinkedListMCMPBenchmark.h @@ -1,16 +1,16 @@ #pragma once -#include - +#include +#include #include #include "../LinkedListBenchmark.h" -#include "multithreading/structures/include/linked_list/FGLockLinkedList.h" + namespace executables::benchmarks::mcmp { class LinkedListMCMPBenchmark final - : public multithreading::utilities::benchmark::ProducerConsumerBenchmark + : public multithreading::benchmark::mcmp::ProducerConsumerBenchmark , LinkedListBenchmark { public: diff --git a/executables/benchmarks/include/mcmp/UnboundedQueueMCMPBenchmark.h b/executables/benchmarks/include/mcmp/UnboundedQueueMCMPBenchmark.h index 360660c..40d7a1a 100644 --- a/executables/benchmarks/include/mcmp/UnboundedQueueMCMPBenchmark.h +++ b/executables/benchmarks/include/mcmp/UnboundedQueueMCMPBenchmark.h @@ -1,13 +1,13 @@ #pragma once -#include +#include #include "../UnboundedQueueBenchmark.h" namespace executables::benchmarks::mcmp { class UnboundedQueueMCMPBenchmark final - : public multithreading::utilities::benchmark::ProducerConsumerBenchmark + : public multithreading::benchmark::mcmp::ProducerConsumerBenchmark , UnboundedQueueBenchmark { public: diff --git a/executables/bounded_queue_benchmark.cpp b/executables/bounded_queue_benchmark.cpp index ec808b9..f391785 100644 --- a/executables/bounded_queue_benchmark.cpp +++ b/executables/bounded_queue_benchmark.cpp @@ -1,22 +1,22 @@ +#include +#include +#include +#include +#include +#include #include #include #include #include -#include -#include -#include -#include -#include -#include #include #include "./benchmarks/include/ThreadConfig.h" #include "./benchmarks/include/mcmp/BoundedQueueMCMPBenchmark.h" -using namespace multithreading::structures::bounded_queue; -using namespace multithreading::utilities::benchmark; +using namespace multithreading::structures::bounded_queue; +using namespace multithreading::benchmark; namespace executables { @@ -34,14 +34,14 @@ namespace executables { const BenchmarkMeasurer matrixMeasurer(matrix, monitor); for (const auto &queue : queues) { - std::shared_ptr const benchmark = + std::shared_ptr const benchmark = std::make_shared(queue.structure); std::shared_ptr const runner = std::make_shared(benchmark); std::cout << queue.title << "\n"; const auto results = matrixMeasurer.measure_benchmark(runner); - display::displayBenchmarkResults(results); + views::ConsoleOutput::displayBenchmarkResults(results); } } } // namespace executables diff --git a/executables/linked_list_benchmark.cpp b/executables/linked_list_benchmark.cpp index 3f0849d..e5d4f31 100644 --- a/executables/linked_list_benchmark.cpp +++ b/executables/linked_list_benchmark.cpp @@ -1,14 +1,14 @@ +#include +#include +#include +#include +#include +#include +#include #include #include #include #include -#include -#include -#include -#include -#include -#include -#include #include #include @@ -16,7 +16,7 @@ #include "./benchmarks/include/mcmp/LinkedListMCMPBenchmark.h" using namespace multithreading::structures::linked_list; -using namespace multithreading::utilities::benchmark; +using namespace multithreading::benchmark; namespace executables { @@ -32,14 +32,14 @@ namespace executables { const BenchmarkMeasurer matrixMeasurer(matrix, monitor); for (const auto &list : lists) { - std::shared_ptr const benchmark = + std::shared_ptr const benchmark = std::make_shared(list.structure); std::shared_ptr const runner = std::make_shared(benchmark); std::cout << list.title << "\n"; const auto results = matrixMeasurer.measure_benchmark(runner); - display::displayBenchmarkResults(results); + views::ConsoleOutput::displayBenchmarkResults(results); } } } // namespace executables diff --git a/executables/unbounded_queue_benchmark.cpp b/executables/unbounded_queue_benchmark.cpp index e9fed0a..32a1d9a 100644 --- a/executables/unbounded_queue_benchmark.cpp +++ b/executables/unbounded_queue_benchmark.cpp @@ -1,20 +1,20 @@ +#include +#include +#include +#include +#include +#include #include #include #include #include -#include -#include -#include -#include -#include -#include #include #include "./benchmarks/include/ThreadConfig.h" #include "./benchmarks/include/mcmp/UnboundedQueueMCMPBenchmark.h" using namespace multithreading::structures::unbounded_queue; -using namespace multithreading::utilities::benchmark; +using namespace multithreading::benchmark; namespace executables { @@ -30,14 +30,14 @@ namespace executables { const BenchmarkMeasurer matrixMeasurer(matrix, monitor); for (const auto &queue : queues) { - std::shared_ptr const benchmark = + std::shared_ptr const benchmark = std::make_shared(queue.structure); std::shared_ptr const runner = std::make_shared(benchmark); std::cout << queue.title << "\n"; const auto results = matrixMeasurer.measure_benchmark(runner); - display::displayBenchmarkResults(results); + views::ConsoleOutput::displayBenchmarkResults(results); } } } // namespace executables diff --git a/multithreading/CMakeLists.txt b/multithreading/CMakeLists.txt index 4a42d9b..7579a57 100644 --- a/multithreading/CMakeLists.txt +++ b/multithreading/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(utilities) -add_subdirectory(structures) \ No newline at end of file +add_subdirectory(structures) +add_subdirectory(benchmark) \ No newline at end of file diff --git a/multithreading/benchmark/CMakeLists.txt b/multithreading/benchmark/CMakeLists.txt new file mode 100644 index 0000000..64db5c4 --- /dev/null +++ b/multithreading/benchmark/CMakeLists.txt @@ -0,0 +1,13 @@ +add_library(benchmark + src/SpeedMeasurement.cpp + src/mcmp/MCMPBenchmarkRunner.cpp) + +if (BUILD_TESTING) + add_executable(benchmark_tests tests/main.cpp) + target_link_libraries(benchmark_tests PRIVATE GTest::gtest_main) + include(GoogleTest) + gtest_discover_tests(benchmark_tests) +endif() + +include_directories(include) +target_link_libraries(benchmark PUBLIC cxx_setup) \ No newline at end of file diff --git a/multithreading/utilities/include/benchmark/BenchmarkMatrix.h b/multithreading/benchmark/include/BenchmarkMatrix.h similarity index 75% rename from multithreading/utilities/include/benchmark/BenchmarkMatrix.h rename to multithreading/benchmark/include/BenchmarkMatrix.h index f2951c9..98101ea 100644 --- a/multithreading/utilities/include/benchmark/BenchmarkMatrix.h +++ b/multithreading/benchmark/include/BenchmarkMatrix.h @@ -3,7 +3,7 @@ #include -namespace multithreading::utilities::benchmark { +namespace multithreading::benchmark { struct alignas(64) BenchmarkMatrixDefinition { public: @@ -16,4 +16,4 @@ namespace multithreading::utilities::benchmark { size_t threads_count; size_t thread_size; }; -} // namespace multithreading::utilities::benchmark +} // namespace multithreading::benchmark diff --git a/multithreading/utilities/include/benchmark/monitor/BenchmarkMeasurement.h b/multithreading/benchmark/include/BenchmarkMeasurement.h similarity index 90% rename from multithreading/utilities/include/benchmark/monitor/BenchmarkMeasurement.h rename to multithreading/benchmark/include/BenchmarkMeasurement.h index 317df64..9795eec 100644 --- a/multithreading/utilities/include/benchmark/monitor/BenchmarkMeasurement.h +++ b/multithreading/benchmark/include/BenchmarkMeasurement.h @@ -1,12 +1,11 @@ -#include - #pragma once +#include +#include -namespace multithreading::utilities::benchmark { +namespace multithreading::benchmark { - struct BenchmarkMeasurementTemplate { - public: + struct alignas(32) BenchmarkMeasurementTemplate { std::string verbose; }; @@ -55,4 +54,4 @@ namespace multithreading::utilities::benchmark { template using RefMeasurementsList = std::tuple*...>; -} // namespace multithreading::utilities::benchmark \ No newline at end of file +} // namespace multithreading::benchmark \ No newline at end of file diff --git a/multithreading/utilities/include/benchmark/BenchmarkMeasurer.h b/multithreading/benchmark/include/BenchmarkMeasurer.h similarity index 90% rename from multithreading/utilities/include/benchmark/BenchmarkMeasurer.h rename to multithreading/benchmark/include/BenchmarkMeasurer.h index 01e1aeb..7a6f167 100644 --- a/multithreading/utilities/include/benchmark/BenchmarkMeasurer.h +++ b/multithreading/benchmark/include/BenchmarkMeasurer.h @@ -1,13 +1,12 @@ #pragma once -#include - #include "./BenchmarkMatrix.h" +#include "./BenchmarkMeasurement.h" +#include "./BenchmarkMonitor.h" #include "./BenchmarkRunner.h" -#include "monitor/BenchmarkMeasurement.h" -#include "monitor/BenchmarkMonitor.h" -namespace multithreading::utilities::benchmark { + +namespace multithreading::benchmark { template struct alignas(64) BenchmarkResult { @@ -57,4 +56,4 @@ namespace multithreading::utilities::benchmark { return benchmark_results; } }; -} // namespace multithreading::utilities::benchmark \ No newline at end of file +} // namespace multithreading::benchmark \ No newline at end of file diff --git a/multithreading/utilities/include/benchmark/monitor/BenchmarkMonitor.h b/multithreading/benchmark/include/BenchmarkMonitor.h similarity index 97% rename from multithreading/utilities/include/benchmark/monitor/BenchmarkMonitor.h rename to multithreading/benchmark/include/BenchmarkMonitor.h index ca0156d..9a0e63f 100644 --- a/multithreading/utilities/include/benchmark/monitor/BenchmarkMonitor.h +++ b/multithreading/benchmark/include/BenchmarkMonitor.h @@ -7,7 +7,8 @@ #include "./BenchmarkMeasurement.h" -namespace multithreading::utilities::benchmark { + +namespace multithreading::benchmark { constexpr size_t DEFAULT_HEARTBEAT_RATE = 10; @@ -129,4 +130,4 @@ namespace multithreading::utilities::benchmark { return results; } }; -} // namespace multithreading::utilities::benchmark \ No newline at end of file +} // namespace multithreading::benchmark \ No newline at end of file diff --git a/multithreading/benchmark/include/BenchmarkRunner.h b/multithreading/benchmark/include/BenchmarkRunner.h new file mode 100644 index 0000000..9c1f648 --- /dev/null +++ b/multithreading/benchmark/include/BenchmarkRunner.h @@ -0,0 +1,14 @@ +#pragma once + +#include "BenchmarkMatrix.h" + +namespace multithreading::benchmark { + + class BenchmarkRunner { + public: + virtual ~BenchmarkRunner() = default; + BenchmarkRunner() = default; + + virtual void run_benchmark_with(const BenchmarkMatrixItem& item) = 0; + }; +} // namespace multithreading::benchmark diff --git a/multithreading/utilities/include/benchmark/MultithreadingTask.h b/multithreading/benchmark/include/MultithreadingTask.h similarity index 88% rename from multithreading/utilities/include/benchmark/MultithreadingTask.h rename to multithreading/benchmark/include/MultithreadingTask.h index dee1722..078f21d 100644 --- a/multithreading/utilities/include/benchmark/MultithreadingTask.h +++ b/multithreading/benchmark/include/MultithreadingTask.h @@ -4,10 +4,11 @@ #include -namespace multithreading::utilities::benchmark { +namespace multithreading::benchmark { template struct alignas(64) BenchmarkTask { + public: std::string title; std::shared_ptr structure; @@ -27,4 +28,4 @@ namespace multithreading::utilities::benchmark { BenchmarkTask(BenchmarkTask&&) noexcept = default; BenchmarkTask& operator=(BenchmarkTask&&) noexcept = default; }; -} // namespace multithreading::utilities::benchmark \ No newline at end of file +} // namespace multithreading::benchmark \ No newline at end of file diff --git a/multithreading/benchmark/include/SpeedMeasurement.h b/multithreading/benchmark/include/SpeedMeasurement.h new file mode 100644 index 0000000..5aa4d86 --- /dev/null +++ b/multithreading/benchmark/include/SpeedMeasurement.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + +#include "./BenchmarkMeasurement.h" + +namespace multithreading::benchmark { + + using DurationType = std::chrono::high_resolution_clock::duration; + + class SpeedMeasurement final : public BenchmarkMeasurement + { + private: + std::chrono::high_resolution_clock::time_point start_time; + std::chrono::high_resolution_clock::time_point end_time; + public: + ~SpeedMeasurement() override = default; + explicit SpeedMeasurement(const BenchmarkMeasurementTemplate &measurement); + + void start() override; + + void snapshot() override; + + void stop() override; + + std::optional> get_result() override; + }; +} // namespace multithreading::benchmark \ No newline at end of file diff --git a/multithreading/utilities/include/benchmark/mcmp/MCMPBenchmarkRunner.h b/multithreading/benchmark/include/mcmp/MCMPBenchmarkRunner.h similarity index 77% rename from multithreading/utilities/include/benchmark/mcmp/MCMPBenchmarkRunner.h rename to multithreading/benchmark/include/mcmp/MCMPBenchmarkRunner.h index eaafb0d..85f050a 100644 --- a/multithreading/utilities/include/benchmark/mcmp/MCMPBenchmarkRunner.h +++ b/multithreading/benchmark/include/mcmp/MCMPBenchmarkRunner.h @@ -1,12 +1,13 @@ #pragma once +#include + +#include "./ProducerConsumerBenchmark.h" #include "../BenchmarkMatrix.h" -#include "../ProducerConsumerBenchmark.h" #include "../BenchmarkRunner.h" -#include -namespace multithreading::utilities::benchmark::mcmp { +namespace multithreading::benchmark::mcmp { class MCMPBenchmarkRunner : public BenchmarkRunner { private: @@ -22,4 +23,4 @@ namespace multithreading::utilities::benchmark::mcmp { const BenchmarkMatrixItem& item ) override; }; -} // namespace multithreading::utilities::benchmark::mcmp +} // namespace multithreading::benchmark::mcmp diff --git a/multithreading/utilities/include/benchmark/ProducerConsumerBenchmark.h b/multithreading/benchmark/include/mcmp/ProducerConsumerBenchmark.h similarity index 73% rename from multithreading/utilities/include/benchmark/ProducerConsumerBenchmark.h rename to multithreading/benchmark/include/mcmp/ProducerConsumerBenchmark.h index af55ae7..8dcb1e1 100644 --- a/multithreading/utilities/include/benchmark/ProducerConsumerBenchmark.h +++ b/multithreading/benchmark/include/mcmp/ProducerConsumerBenchmark.h @@ -3,7 +3,7 @@ #include -namespace multithreading::utilities::benchmark { +namespace multithreading::benchmark::mcmp { class ProducerConsumerBenchmark { public: @@ -12,4 +12,4 @@ namespace multithreading::utilities::benchmark { virtual void producer_routine(std::size_t threadSize) = 0; virtual void consumer_routine(std::size_t threadSize) = 0; }; -} // namespace multithreading::utilities::benchmark \ No newline at end of file +} // namespace multithreading::benchmark::mcmp \ No newline at end of file diff --git a/multithreading/benchmark/include/views/ConsoleOutput.h b/multithreading/benchmark/include/views/ConsoleOutput.h new file mode 100644 index 0000000..13232dd --- /dev/null +++ b/multithreading/benchmark/include/views/ConsoleOutput.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include + +#include "../BenchmarkMeasurer.h" + +namespace multithreading::benchmark::views { + + class ConsoleOutput { + public: + template + static void displayBenchmarkResults( + std::vector> results + ) { + for (const auto &[threads_count, thread_size, measurements] : results) { + std::cout << "Threads (" << threads_count << ") Size (" << thread_size << ")\n"; + std::apply([&](const auto&... measurement) { + ((std::cout << "\t" << measurement.get_verbose() << ": " + << measurement.get_measurement().value() << std::endl), ...); + }, measurements); + } + std::cout << '\n'; + } + }; +} // namespace multithreading::benchmark::views diff --git a/multithreading/benchmark/src/SpeedMeasurement.cpp b/multithreading/benchmark/src/SpeedMeasurement.cpp new file mode 100644 index 0000000..7fc35bd --- /dev/null +++ b/multithreading/benchmark/src/SpeedMeasurement.cpp @@ -0,0 +1,37 @@ +#include "../include/SpeedMeasurement.h" + +namespace multithreading::benchmark { + + SpeedMeasurement::SpeedMeasurement(const BenchmarkMeasurementTemplate &measurement) + : BenchmarkMeasurement(measurement) + {} + + void SpeedMeasurement::start() { + this->is_started = true; + this->start_time = std::chrono::high_resolution_clock::now(); + } + + void SpeedMeasurement::snapshot() { + } + + void SpeedMeasurement::stop() { + this->is_measured = true; + this->is_started = false; + this->end_time = std::chrono::high_resolution_clock::now(); + } + + std::optional< + BenchmarkMeasurementResult + > SpeedMeasurement::get_result() { + if (!is_measured || is_started) { + return std::nullopt; + } + + return BenchmarkMeasurementResult( + information, + std::chrono::duration_cast( + end_time - start_time + ) + ); + } +} // namespace multithreading::benchmark diff --git a/multithreading/utilities/src/benchmark/mcmp/MCMPBenchmarkRunner.cpp b/multithreading/benchmark/src/mcmp/MCMPBenchmarkRunner.cpp similarity index 84% rename from multithreading/utilities/src/benchmark/mcmp/MCMPBenchmarkRunner.cpp rename to multithreading/benchmark/src/mcmp/MCMPBenchmarkRunner.cpp index 5ed5313..0415a49 100644 --- a/multithreading/utilities/src/benchmark/mcmp/MCMPBenchmarkRunner.cpp +++ b/multithreading/benchmark/src/mcmp/MCMPBenchmarkRunner.cpp @@ -1,9 +1,9 @@ -#include "../../../include/benchmark/mcmp/MCMPBenchmarkRunner.h" +#include "../../include/mcmp/MCMPBenchmarkRunner.h" #include -namespace multithreading::utilities::benchmark::mcmp { +namespace multithreading::benchmark::mcmp { MCMPBenchmarkRunner::MCMPBenchmarkRunner( const std::shared_ptr& benchmark @@ -35,4 +35,4 @@ namespace multithreading::utilities::benchmark::mcmp { } } } -} // namespace multithreading::utilities::benchmark::mcmp \ No newline at end of file +} // namespace multithreading::benchmark::mcmp \ No newline at end of file diff --git a/multithreading/benchmark/tests/main.cpp b/multithreading/benchmark/tests/main.cpp new file mode 100644 index 0000000..4aebf54 --- /dev/null +++ b/multithreading/benchmark/tests/main.cpp @@ -0,0 +1,9 @@ +#include + +TEST(Group1, Test1) { + EXPECT_EQ(3, 1 + 2); +} + +TEST(Group1, Test2) { + ASSERT_FLOAT_EQ(3.0F, 1.0F + 2.0F); +} \ No newline at end of file diff --git a/multithreading/structures/include/bounded_queue/LockFreeBoundedQueue.h b/multithreading/structures/include/bounded_queue/LockFreeBoundedQueue.h index 11b49ac..b5ded7b 100644 --- a/multithreading/structures/include/bounded_queue/LockFreeBoundedQueue.h +++ b/multithreading/structures/include/bounded_queue/LockFreeBoundedQueue.h @@ -1,7 +1,7 @@ #pragma once -#include #include + #include #include diff --git a/multithreading/utilities/CMakeLists.txt b/multithreading/utilities/CMakeLists.txt index 4163150..6d300f7 100644 --- a/multithreading/utilities/CMakeLists.txt +++ b/multithreading/utilities/CMakeLists.txt @@ -1,7 +1,5 @@ add_library(utilities - src/benchmark/mcmp/MCMPBenchmarkRunner.cpp - include/benchmark/monitor/SpeedMeasurement.h - include/ConsoleOutput.h) + src/placeholder.cpp) if (BUILD_TESTING) add_executable(utilities_tests tests/main.cpp) diff --git a/multithreading/utilities/include/ConsoleOutput.h b/multithreading/utilities/include/ConsoleOutput.h deleted file mode 100644 index ef8e540..0000000 --- a/multithreading/utilities/include/ConsoleOutput.h +++ /dev/null @@ -1,6 +0,0 @@ -#pragma once - - -namespace multithreading::utilities { - -} // namespace multithreading::utilities \ No newline at end of file diff --git a/multithreading/utilities/include/benchmark/BenchmarkRunner.h b/multithreading/utilities/include/benchmark/BenchmarkRunner.h deleted file mode 100644 index 12f3ba2..0000000 --- a/multithreading/utilities/include/benchmark/BenchmarkRunner.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include "./BenchmarkMatrix.h" - - -namespace multithreading::utilities::benchmark { - - class BenchmarkRunner { - public: - virtual ~BenchmarkRunner() = default; - BenchmarkRunner() = default; - - virtual void run_benchmark_with( - const BenchmarkMatrixItem& item - ) = 0; - }; -} // namespace multithreading::utilities::benchmark diff --git a/multithreading/utilities/include/benchmark/display/ConsoleOutput.h b/multithreading/utilities/include/benchmark/display/ConsoleOutput.h deleted file mode 100644 index 331bd21..0000000 --- a/multithreading/utilities/include/benchmark/display/ConsoleOutput.h +++ /dev/null @@ -1,21 +0,0 @@ -#pragma once - -#include -#include - -namespace multithreading::utilities::benchmark::display { - - template - static void displayBenchmarkResults( - std::vector> results - ) { - for (const auto &[threads_count, thread_size, measurements] : results) { - std::cout << "Threads (" << threads_count << ") Size (" << thread_size << ")\n"; - std::apply([&](const auto&... measurement) { - ((std::cout << "\t" << measurement.get_verbose() << ": " - << measurement.get_measurement().value() << std::endl), ...); - }, measurements); - } - std::cout << '\n'; - } -} // namespace multithreading::utilities::benchmark::display diff --git a/multithreading/utilities/include/benchmark/monitor/SpeedMeasurement.h b/multithreading/utilities/include/benchmark/monitor/SpeedMeasurement.h deleted file mode 100644 index 4eb1e94..0000000 --- a/multithreading/utilities/include/benchmark/monitor/SpeedMeasurement.h +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once -#include - -#include "BenchmarkMeasurement.h" - -namespace multithreading::utilities::benchmark { - - using DurationType = std::chrono::high_resolution_clock::duration; - - class SpeedMeasurement final - : public BenchmarkMeasurement - { - private: - std::chrono::high_resolution_clock::time_point start_time; - std::chrono::high_resolution_clock::time_point end_time; - public: - ~SpeedMeasurement() override = default; - explicit SpeedMeasurement(const BenchmarkMeasurementTemplate &measurement) - : BenchmarkMeasurement(measurement) - {} - - void start() override { - this->is_started = true; - this->start_time = std::chrono::high_resolution_clock::now(); - } - - void snapshot() override {} - - void stop() override { - this->is_measured = true; - this->is_started = false; - this->end_time = std::chrono::high_resolution_clock::now(); - } - - std::optional< - BenchmarkMeasurementResult - > get_result() override { - if (!is_measured || is_started) { - return std::nullopt; - } - - return BenchmarkMeasurementResult( - information, - std::chrono::duration_cast( - end_time - start_time - ) - ); - } - }; - -} // namespace multithreading::utilities::benchmark \ No newline at end of file diff --git a/multithreading/utilities/src/placeholder.cpp b/multithreading/utilities/src/placeholder.cpp new file mode 100644 index 0000000..0f1f29d --- /dev/null +++ b/multithreading/utilities/src/placeholder.cpp @@ -0,0 +1,4 @@ +#include "../include/Application.h" +#include "../include/performance/AlignedField.h" +#include "../include/performance/EpochGuard.h" +#include "../include/performance/EpochReclamation.h" From a15409fc68217a4a133c6dde5c2bf8791477a928 Mon Sep 17 00:00:00 2001 From: Nikita Lapin Date: Thu, 12 Feb 2026 14:49:18 -0600 Subject: [PATCH 13/19] feat: average memory and peak memory benchmarks --- executables/bounded_queue_benchmark.cpp | 10 ++- executables/linked_list_benchmark.cpp | 15 ++-- executables/unbounded_queue_benchmark.cpp | 13 +++- multithreading/benchmark/CMakeLists.txt | 5 +- .../include/AverageMemoryMeasurement.h | 28 ++++++++ .../benchmark/include/BenchmarkMeasurement.h | 13 +++- .../benchmark/include/PeakMemoryMeasurement.h | 28 ++++++++ .../benchmark/include/SpeedMeasurement.h | 2 +- multithreading/benchmark/include/Units.h | 11 +++ .../benchmark/include/views/ConsoleOutput.h | 2 +- .../src/AverageMemoryMeasurement.cpp | 60 ++++++++++++++++ .../benchmark/src/PeakMemoryMeasurement.cpp | 41 +++++++++++ .../benchmark/src/SpeedMeasurement.cpp | 34 +++++---- multithreading/utilities/CMakeLists.txt | 3 +- .../utilities/include/performance/Memory.h | 69 +++++++++++++++++++ readme.md | 6 +- 16 files changed, 312 insertions(+), 28 deletions(-) create mode 100644 multithreading/benchmark/include/AverageMemoryMeasurement.h create mode 100644 multithreading/benchmark/include/PeakMemoryMeasurement.h create mode 100644 multithreading/benchmark/include/Units.h create mode 100644 multithreading/benchmark/src/AverageMemoryMeasurement.cpp create mode 100644 multithreading/benchmark/src/PeakMemoryMeasurement.cpp create mode 100644 multithreading/utilities/include/performance/Memory.h diff --git a/executables/bounded_queue_benchmark.cpp b/executables/bounded_queue_benchmark.cpp index f391785..ff174bc 100644 --- a/executables/bounded_queue_benchmark.cpp +++ b/executables/bounded_queue_benchmark.cpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include @@ -26,9 +28,11 @@ namespace executables { .per_thread_sizes = std::vector{ benchmarks::THREAD_SIZE, benchmarks::THREAD_SIZE * 10 }, .threads_count = std::vector{ benchmarks::THREADS_COUNT, benchmarks::THREADS_COUNT * 2 } }; - const auto monitor = std::make_shared>( - RefMeasurementsList( - new SpeedMeasurement({ .verbose = "Execution Time" }) + const auto monitor = std::make_shared>( + RefMeasurementsList( + new SpeedMeasurement({ .verbose = "Execution Time" }), + new PeakMemoryMeasurement({ .verbose = "Peak Memory Overhead" }), + new AverageMemoryMeasurement({ .verbose = "Average Memory Usage" }) ) ); const BenchmarkMeasurer matrixMeasurer(matrix, monitor); diff --git a/executables/linked_list_benchmark.cpp b/executables/linked_list_benchmark.cpp index e5d4f31..2c493fa 100644 --- a/executables/linked_list_benchmark.cpp +++ b/executables/linked_list_benchmark.cpp @@ -1,7 +1,7 @@ #include #include -#include #include +#include #include #include #include @@ -9,6 +9,9 @@ #include #include #include +#include +#include + #include #include @@ -26,9 +29,13 @@ namespace executables { .per_thread_sizes = std::vector{ benchmarks::THREAD_SIZE, benchmarks::THREAD_SIZE * 10 }, .threads_count = std::vector{ benchmarks::THREADS_COUNT, benchmarks::THREADS_COUNT * 2 } }; - const auto monitor = std::make_shared>(RefMeasurementsList( - new SpeedMeasurement({ .verbose = "Execution Time" }) - )); + const auto monitor = std::make_shared>( + RefMeasurementsList( + new SpeedMeasurement({ .verbose = "Execution Time" }), + new PeakMemoryMeasurement({ .verbose = "Peak Memory Overhead" }), + new AverageMemoryMeasurement({ .verbose = "Average Memory Usage" }) + ) + ); const BenchmarkMeasurer matrixMeasurer(matrix, monitor); for (const auto &list : lists) { diff --git a/executables/unbounded_queue_benchmark.cpp b/executables/unbounded_queue_benchmark.cpp index 32a1d9a..9df8602 100644 --- a/executables/unbounded_queue_benchmark.cpp +++ b/executables/unbounded_queue_benchmark.cpp @@ -8,6 +8,9 @@ #include #include #include +#include +#include + #include #include "./benchmarks/include/ThreadConfig.h" @@ -24,9 +27,13 @@ namespace executables { .per_thread_sizes = std::vector{ benchmarks::THREAD_SIZE, benchmarks::THREAD_SIZE * 10 }, .threads_count = std::vector{ benchmarks::THREADS_COUNT, benchmarks::THREADS_COUNT * 2 } }; - const auto monitor = std::make_shared>(RefMeasurementsList( - new SpeedMeasurement({ .verbose = "Execution Time" }) - )); + const auto monitor = std::make_shared>( + RefMeasurementsList( + new SpeedMeasurement({ .verbose = "Execution Time" }), + new PeakMemoryMeasurement({ .verbose = "Peak Memory Overhead" }), + new AverageMemoryMeasurement({ .verbose = "Average Memory Usage" }) + ) + ); const BenchmarkMeasurer matrixMeasurer(matrix, monitor); for (const auto &queue : queues) { diff --git a/multithreading/benchmark/CMakeLists.txt b/multithreading/benchmark/CMakeLists.txt index 64db5c4..930e24d 100644 --- a/multithreading/benchmark/CMakeLists.txt +++ b/multithreading/benchmark/CMakeLists.txt @@ -1,6 +1,9 @@ add_library(benchmark + src/mcmp/MCMPBenchmarkRunner.cpp src/SpeedMeasurement.cpp - src/mcmp/MCMPBenchmarkRunner.cpp) + src/PeakMemoryMeasurement.cpp + src/AverageMemoryMeasurement.cpp + include/Units.h) if (BUILD_TESTING) add_executable(benchmark_tests tests/main.cpp) diff --git a/multithreading/benchmark/include/AverageMemoryMeasurement.h b/multithreading/benchmark/include/AverageMemoryMeasurement.h new file mode 100644 index 0000000..a385f48 --- /dev/null +++ b/multithreading/benchmark/include/AverageMemoryMeasurement.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include + +#include "./BenchmarkMeasurement.h" + + +namespace multithreading::benchmark { + + class AverageMemoryMeasurement final : public BenchmarkMeasurement { + private: + std::map snapshots; + size_t baseline; + public: + ~AverageMemoryMeasurement() override = default; + + explicit AverageMemoryMeasurement(const BenchmarkMeasurementTemplate& measurement); + + void start() override; + + void snapshot() override; + + void stop() override; + + std::optional> get_result() override; + }; +} // namespace multithreading::benchmark \ No newline at end of file diff --git a/multithreading/benchmark/include/BenchmarkMeasurement.h b/multithreading/benchmark/include/BenchmarkMeasurement.h index 9795eec..3b97a0a 100644 --- a/multithreading/benchmark/include/BenchmarkMeasurement.h +++ b/multithreading/benchmark/include/BenchmarkMeasurement.h @@ -2,6 +2,7 @@ #include #include +#include namespace multithreading::benchmark { @@ -13,10 +14,16 @@ namespace multithreading::benchmark { struct BenchmarkMeasurementResult { private: BenchmarkMeasurementTemplate reference_template; + std::string unit; T result; public: - explicit BenchmarkMeasurementResult(BenchmarkMeasurementTemplate measurement, T result) + explicit BenchmarkMeasurementResult( + BenchmarkMeasurementTemplate measurement, + T result, + std::string unit + ) : reference_template(std::move(measurement)) + , unit(std::move(unit)) , result(result) {} @@ -27,6 +34,10 @@ namespace multithreading::benchmark { [[nodiscard]] std::optional get_measurement() const { return result; } + + [[nodiscard]] std::string get_unit() const { + return unit; + } }; template diff --git a/multithreading/benchmark/include/PeakMemoryMeasurement.h b/multithreading/benchmark/include/PeakMemoryMeasurement.h new file mode 100644 index 0000000..3654414 --- /dev/null +++ b/multithreading/benchmark/include/PeakMemoryMeasurement.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include + +#include "./BenchmarkMeasurement.h" + + +namespace multithreading::benchmark { + + class PeakMemoryMeasurement final : public BenchmarkMeasurement { + private: + double baseline; + double peak; + public: + ~PeakMemoryMeasurement() override = default; + + explicit PeakMemoryMeasurement(const BenchmarkMeasurementTemplate& measurement); + + void start() override; + + void snapshot() override; + + void stop() override; + + std::optional> get_result() override; + }; +} // namespace multithreading::benchmark \ No newline at end of file diff --git a/multithreading/benchmark/include/SpeedMeasurement.h b/multithreading/benchmark/include/SpeedMeasurement.h index 5aa4d86..9c3be3a 100644 --- a/multithreading/benchmark/include/SpeedMeasurement.h +++ b/multithreading/benchmark/include/SpeedMeasurement.h @@ -7,7 +7,7 @@ namespace multithreading::benchmark { - using DurationType = std::chrono::high_resolution_clock::duration; + using DurationType = long long; class SpeedMeasurement final : public BenchmarkMeasurement { diff --git a/multithreading/benchmark/include/Units.h b/multithreading/benchmark/include/Units.h new file mode 100644 index 0000000..c4c7626 --- /dev/null +++ b/multithreading/benchmark/include/Units.h @@ -0,0 +1,11 @@ +#pragma once + +#include + +namespace multithreading::benchmark { + + constexpr size_t KiB = 1024; + + constexpr size_t NS_PER_MS = 1000000; + constexpr size_t NS_PER_MICS = 1000; +} // namespace multithreading::benchmark \ No newline at end of file diff --git a/multithreading/benchmark/include/views/ConsoleOutput.h b/multithreading/benchmark/include/views/ConsoleOutput.h index 13232dd..c03655f 100644 --- a/multithreading/benchmark/include/views/ConsoleOutput.h +++ b/multithreading/benchmark/include/views/ConsoleOutput.h @@ -17,7 +17,7 @@ namespace multithreading::benchmark::views { std::cout << "Threads (" << threads_count << ") Size (" << thread_size << ")\n"; std::apply([&](const auto&... measurement) { ((std::cout << "\t" << measurement.get_verbose() << ": " - << measurement.get_measurement().value() << std::endl), ...); + << measurement.get_measurement().value() << measurement.get_unit() << std::endl), ...); }, measurements); } std::cout << '\n'; diff --git a/multithreading/benchmark/src/AverageMemoryMeasurement.cpp b/multithreading/benchmark/src/AverageMemoryMeasurement.cpp new file mode 100644 index 0000000..101ee1f --- /dev/null +++ b/multithreading/benchmark/src/AverageMemoryMeasurement.cpp @@ -0,0 +1,60 @@ +#include "../include/AverageMemoryMeasurement.h" + +#include + +#include + +namespace multithreading::benchmark { + + AverageMemoryMeasurement::AverageMemoryMeasurement(const BenchmarkMeasurementTemplate& measurement) + : BenchmarkMeasurement(measurement) + , baseline(0) + {} + + void AverageMemoryMeasurement::start() { + is_started = true; + snapshots.clear(); + baseline = utilities::performance::MemoryMeasurement::currentMemoryUsage(); + } + + void AverageMemoryMeasurement::snapshot() { + if (is_started) { + const size_t memory_usage = + utilities::performance::MemoryMeasurement::currentMemoryUsage(); + const std::chrono::nanoseconds timestamp = + std::chrono::high_resolution_clock::now().time_since_epoch(); + + snapshots[timestamp] = memory_usage; + } + } + + void AverageMemoryMeasurement::stop() { + if (is_started) { + this->snapshot(); + is_measured = true; + is_started = false; + } + } + + std::optional> AverageMemoryMeasurement::get_result() { + if (!is_measured || is_started || snapshots.empty()) { + return std::nullopt; + } + + const size_t snapshots_summary = std::accumulate( + snapshots.begin(), + snapshots.end(), + size_t{0}, + [&]( + const size_t accumulated, + const std::pair& item + ) { + return accumulated + (item.second - baseline); + } + ); + const double snapshots_average = static_cast(snapshots_summary) / + static_cast(snapshots.size()); + + return BenchmarkMeasurementResult(information, snapshots_average, "kB"); + } +} // namespace multithreading::benchmark diff --git a/multithreading/benchmark/src/PeakMemoryMeasurement.cpp b/multithreading/benchmark/src/PeakMemoryMeasurement.cpp new file mode 100644 index 0000000..03bc8f7 --- /dev/null +++ b/multithreading/benchmark/src/PeakMemoryMeasurement.cpp @@ -0,0 +1,41 @@ +#include "../include/PeakMemoryMeasurement.h" + +#include + + +namespace multithreading::benchmark { + + PeakMemoryMeasurement::PeakMemoryMeasurement(const BenchmarkMeasurementTemplate& measurement) + : BenchmarkMeasurement(measurement) + , baseline(0) + , peak(0) + {} + + void PeakMemoryMeasurement::start() { + is_started = true; + baseline = utilities::performance::MemoryMeasurement::peakMemoryUsage(); + } + + void PeakMemoryMeasurement::snapshot() { + const double peak_usage = utilities::performance::MemoryMeasurement::peakMemoryUsage(); + const double relative_peak = peak_usage - baseline; + + peak = std::max(peak, relative_peak); + } + + void PeakMemoryMeasurement::stop() { + if (is_started) { + this->snapshot(); + is_measured = true; + is_started = false; + } + } + + std::optional> PeakMemoryMeasurement::get_result() { + if (!is_measured || is_started) { + return std::nullopt; + } + + return BenchmarkMeasurementResult(information, peak, "kB"); + } +} // namespace multithreading::benchmark diff --git a/multithreading/benchmark/src/SpeedMeasurement.cpp b/multithreading/benchmark/src/SpeedMeasurement.cpp index 7fc35bd..a5774cf 100644 --- a/multithreading/benchmark/src/SpeedMeasurement.cpp +++ b/multithreading/benchmark/src/SpeedMeasurement.cpp @@ -1,37 +1,47 @@ #include "../include/SpeedMeasurement.h" +#include "../include/Units.h" + namespace multithreading::benchmark { SpeedMeasurement::SpeedMeasurement(const BenchmarkMeasurementTemplate &measurement) - : BenchmarkMeasurement(measurement) + : BenchmarkMeasurement(measurement) {} void SpeedMeasurement::start() { - this->is_started = true; - this->start_time = std::chrono::high_resolution_clock::now(); + is_started = true; + start_time = std::chrono::high_resolution_clock::now(); } void SpeedMeasurement::snapshot() { } void SpeedMeasurement::stop() { - this->is_measured = true; - this->is_started = false; - this->end_time = std::chrono::high_resolution_clock::now(); + if (!is_started) { + return; + } + + is_measured = true; + is_started = false; + end_time = std::chrono::high_resolution_clock::now(); } - std::optional< - BenchmarkMeasurementResult - > SpeedMeasurement::get_result() { + std::optional> SpeedMeasurement::get_result() { if (!is_measured || is_started) { return std::nullopt; } + const auto duration = std::chrono::duration_cast( + end_time - start_time + ); + const DurationType duration_ms = std::round( + static_cast(duration.count()) / static_cast(NS_PER_MICS) + ); + return BenchmarkMeasurementResult( information, - std::chrono::duration_cast( - end_time - start_time - ) + duration_ms, + "mics" ); } } // namespace multithreading::benchmark diff --git a/multithreading/utilities/CMakeLists.txt b/multithreading/utilities/CMakeLists.txt index 6d300f7..300802f 100644 --- a/multithreading/utilities/CMakeLists.txt +++ b/multithreading/utilities/CMakeLists.txt @@ -1,5 +1,6 @@ add_library(utilities - src/placeholder.cpp) + src/placeholder.cpp + include/performance/Memory.h) if (BUILD_TESTING) add_executable(utilities_tests tests/main.cpp) diff --git a/multithreading/utilities/include/performance/Memory.h b/multithreading/utilities/include/performance/Memory.h new file mode 100644 index 0000000..cc9e55f --- /dev/null +++ b/multithreading/utilities/include/performance/Memory.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include +#include +#include + +#ifdef __linux__ +#include +#include +#elif __APPLE__ +#include +#endif + +namespace multithreading::utilities::performance { + + constexpr size_t KiB = 1024; + + class MemoryMeasurement { + public: + static double peakMemoryUsage() { + struct rusage usage; + getrusage(RUSAGE_SELF, &usage); + double memory_usage = 0; +#ifdef __APPLE__ + memory_usage = static_cast(usage.ru_maxrss) / static_cast(KiB); +#else + memory_usage = usage.ru_maxrss; +#endif + return memory_usage; + } + + static size_t currentMemoryUsage() { +#ifdef __linux__ + std::ifstream status_stream("/proc/self/status", ios_base::in); + std::string line; + while (std::getline(status_stream, line)) { + if (line.find("VmRSS:") != std::string::npos) { + long rss; + sscanf(line.c_str(), "VmRSS: %ld kB", &rss); + return static_cast(rss); + } + } + + std::cerr << "MemoryMeasurement::currentMemoryUsage() failed " + "to find the memory state in the /proc/self/status file\n"; + return 0; +# elif __APPLE__ + struct mach_task_basic_info basic_info; + mach_msg_type_number_t msg_size = MACH_TASK_BASIC_INFO_COUNT; + kern_return_t kerr = task_info( + mach_task_self(), + MACH_TASK_BASIC_INFO, + (task_info_t)&basic_info, + &msg_size + ); + if (kerr == KERN_SUCCESS) { + return basic_info.resident_size / KiB; + } + + std::cerr << "MemoryMeasurement::currentMemoryUsage() failed " + "to retrieve the memory state: " << kerr << '\n'; + return 0; +#else +#error "The platform is not supported for memory usage identification" +#endif + } + }; +} // namespace multithreading::utilities::performance \ No newline at end of file diff --git a/readme.md b/readme.md index a2db70e..cf71a23 100644 --- a/readme.md +++ b/readme.md @@ -51,4 +51,8 @@ The example of a working project with this template structure ### AI Warships Game Warships game against AI with skills, game saves, and progression. -[![Readme Card](https://github-readme-stats.vercel.app/api/pin/?username=TrofimovVladislav5&theme=tokyonight&height=200&repo=cpp-warships)](https://github.com/TrofimovVladislav5/cpp-warships) \ No newline at end of file +[![Readme Card](https://github-readme-stats.vercel.app/api/pin/?username=TrofimovVladislav5&theme=tokyonight&height=200&repo=cpp-warships)](https://github.com/TrofimovVladislav5/cpp-warships) + +TODO: +- [ ] Write structures tests +- [ ] Write benc \ No newline at end of file From ab8a3ebf9c3084b8594d87deebfffab3cd7157ba Mon Sep 17 00:00:00 2001 From: Nikita Lapin Date: Thu, 19 Feb 2026 13:14:03 -0600 Subject: [PATCH 14/19] fix: used snapshots to measure average memory usage instead of measuring by its peak --- multithreading/benchmark/CMakeLists.txt | 3 +-- .../benchmark/include/PeakMemoryMeasurement.h | 4 +++- .../benchmark/src/PeakMemoryMeasurement.cpp | 20 +++++++++++++------ .../benchmark/src/SpeedMeasurement.cpp | 4 ++-- .../{benchmark => utilities}/include/Units.h | 4 ++-- .../utilities/include/performance/Memory.h | 8 +++----- 6 files changed, 25 insertions(+), 18 deletions(-) rename multithreading/{benchmark => utilities}/include/Units.h (66%) diff --git a/multithreading/benchmark/CMakeLists.txt b/multithreading/benchmark/CMakeLists.txt index 930e24d..31d51c8 100644 --- a/multithreading/benchmark/CMakeLists.txt +++ b/multithreading/benchmark/CMakeLists.txt @@ -2,8 +2,7 @@ add_library(benchmark src/mcmp/MCMPBenchmarkRunner.cpp src/SpeedMeasurement.cpp src/PeakMemoryMeasurement.cpp - src/AverageMemoryMeasurement.cpp - include/Units.h) + src/AverageMemoryMeasurement.cpp) if (BUILD_TESTING) add_executable(benchmark_tests tests/main.cpp) diff --git a/multithreading/benchmark/include/PeakMemoryMeasurement.h b/multithreading/benchmark/include/PeakMemoryMeasurement.h index 3654414..cd76503 100644 --- a/multithreading/benchmark/include/PeakMemoryMeasurement.h +++ b/multithreading/benchmark/include/PeakMemoryMeasurement.h @@ -11,7 +11,9 @@ namespace multithreading::benchmark { class PeakMemoryMeasurement final : public BenchmarkMeasurement { private: double baseline; - double peak; + std::vector snapshots; + + double recorded_result; public: ~PeakMemoryMeasurement() override = default; diff --git a/multithreading/benchmark/src/PeakMemoryMeasurement.cpp b/multithreading/benchmark/src/PeakMemoryMeasurement.cpp index 03bc8f7..b93752a 100644 --- a/multithreading/benchmark/src/PeakMemoryMeasurement.cpp +++ b/multithreading/benchmark/src/PeakMemoryMeasurement.cpp @@ -8,19 +8,20 @@ namespace multithreading::benchmark { PeakMemoryMeasurement::PeakMemoryMeasurement(const BenchmarkMeasurementTemplate& measurement) : BenchmarkMeasurement(measurement) , baseline(0) - , peak(0) + , snapshots({}) + , recorded_result(0) {} void PeakMemoryMeasurement::start() { is_started = true; - baseline = utilities::performance::MemoryMeasurement::peakMemoryUsage(); + baseline = utilities::performance::MemoryMeasurement::currentMemoryUsage(); + snapshots.clear(); } void PeakMemoryMeasurement::snapshot() { - const double peak_usage = utilities::performance::MemoryMeasurement::peakMemoryUsage(); - const double relative_peak = peak_usage - baseline; + const double current_usage = utilities::performance::MemoryMeasurement::currentMemoryUsage(); - peak = std::max(peak, relative_peak); + snapshots.emplace_back(current_usage); } void PeakMemoryMeasurement::stop() { @@ -28,6 +29,13 @@ namespace multithreading::benchmark { this->snapshot(); is_measured = true; is_started = false; + + const double highest_snapshot = *std::ranges::max_element( + snapshots.begin(), + snapshots.end(), + std::ranges::less{}, + std::identity{}); + recorded_result = std::abs(highest_snapshot - baseline); } } @@ -36,6 +44,6 @@ namespace multithreading::benchmark { return std::nullopt; } - return BenchmarkMeasurementResult(information, peak, "kB"); + return BenchmarkMeasurementResult(information, recorded_result, "kB"); } } // namespace multithreading::benchmark diff --git a/multithreading/benchmark/src/SpeedMeasurement.cpp b/multithreading/benchmark/src/SpeedMeasurement.cpp index a5774cf..7f965cc 100644 --- a/multithreading/benchmark/src/SpeedMeasurement.cpp +++ b/multithreading/benchmark/src/SpeedMeasurement.cpp @@ -1,6 +1,6 @@ #include "../include/SpeedMeasurement.h" -#include "../include/Units.h" +#include namespace multithreading::benchmark { @@ -35,7 +35,7 @@ namespace multithreading::benchmark { end_time - start_time ); const DurationType duration_ms = std::round( - static_cast(duration.count()) / static_cast(NS_PER_MICS) + static_cast(duration.count()) / static_cast(utilities::NS_PER_MICS) ); return BenchmarkMeasurementResult( diff --git a/multithreading/benchmark/include/Units.h b/multithreading/utilities/include/Units.h similarity index 66% rename from multithreading/benchmark/include/Units.h rename to multithreading/utilities/include/Units.h index c4c7626..d64a1cf 100644 --- a/multithreading/benchmark/include/Units.h +++ b/multithreading/utilities/include/Units.h @@ -2,10 +2,10 @@ #include -namespace multithreading::benchmark { +namespace multithreading::utilities { constexpr size_t KiB = 1024; constexpr size_t NS_PER_MS = 1000000; constexpr size_t NS_PER_MICS = 1000; -} // namespace multithreading::benchmark \ No newline at end of file +} // namespace multithreading::utilities \ No newline at end of file diff --git a/multithreading/utilities/include/performance/Memory.h b/multithreading/utilities/include/performance/Memory.h index cc9e55f..88b0477 100644 --- a/multithreading/utilities/include/performance/Memory.h +++ b/multithreading/utilities/include/performance/Memory.h @@ -1,7 +1,7 @@ #pragma once +#include "../Units.h" #include -#include #include #include @@ -14,8 +14,6 @@ namespace multithreading::utilities::performance { - constexpr size_t KiB = 1024; - class MemoryMeasurement { public: static double peakMemoryUsage() { @@ -30,7 +28,7 @@ namespace multithreading::utilities::performance { return memory_usage; } - static size_t currentMemoryUsage() { + static double currentMemoryUsage() { #ifdef __linux__ std::ifstream status_stream("/proc/self/status", ios_base::in); std::string line; @@ -55,7 +53,7 @@ namespace multithreading::utilities::performance { &msg_size ); if (kerr == KERN_SUCCESS) { - return basic_info.resident_size / KiB; + return static_cast(basic_info.resident_size) / static_cast(KiB); } std::cerr << "MemoryMeasurement::currentMemoryUsage() failed " From fb9645e4174f179000186b5f252df758d2748caf Mon Sep 17 00:00:00 2001 From: isoldpower Date: Mon, 23 Feb 2026 17:12:20 -0600 Subject: [PATCH 15/19] fix: include missing headers for g++ linkage --- multithreading/benchmark/include/AverageMemoryMeasurement.h | 3 ++- multithreading/benchmark/include/BenchmarkMeasurer.h | 3 ++- multithreading/benchmark/include/BenchmarkMonitor.h | 3 ++- multithreading/benchmark/include/PeakMemoryMeasurement.h | 4 ++-- multithreading/benchmark/src/AverageMemoryMeasurement.cpp | 1 + multithreading/benchmark/src/PeakMemoryMeasurement.cpp | 5 +++-- multithreading/benchmark/src/SpeedMeasurement.cpp | 1 + multithreading/benchmark/src/mcmp/MCMPBenchmarkRunner.cpp | 6 +++--- .../structures/include/linked_list/FGLockLinkedList.h | 3 ++- multithreading/utilities/include/performance/Memory.h | 4 ++-- 10 files changed, 20 insertions(+), 13 deletions(-) diff --git a/multithreading/benchmark/include/AverageMemoryMeasurement.h b/multithreading/benchmark/include/AverageMemoryMeasurement.h index a385f48..934f134 100644 --- a/multithreading/benchmark/include/AverageMemoryMeasurement.h +++ b/multithreading/benchmark/include/AverageMemoryMeasurement.h @@ -2,6 +2,7 @@ #include #include +#include #include "./BenchmarkMeasurement.h" @@ -25,4 +26,4 @@ namespace multithreading::benchmark { std::optional> get_result() override; }; -} // namespace multithreading::benchmark \ No newline at end of file +} // namespace multithreading::benchmark diff --git a/multithreading/benchmark/include/BenchmarkMeasurer.h b/multithreading/benchmark/include/BenchmarkMeasurer.h index 7a6f167..2438493 100644 --- a/multithreading/benchmark/include/BenchmarkMeasurer.h +++ b/multithreading/benchmark/include/BenchmarkMeasurer.h @@ -5,6 +5,7 @@ #include "./BenchmarkMonitor.h" #include "./BenchmarkRunner.h" +#include namespace multithreading::benchmark { @@ -56,4 +57,4 @@ namespace multithreading::benchmark { return benchmark_results; } }; -} // namespace multithreading::benchmark \ No newline at end of file +} // namespace multithreading::benchmark diff --git a/multithreading/benchmark/include/BenchmarkMonitor.h b/multithreading/benchmark/include/BenchmarkMonitor.h index 9a0e63f..4d5ef8e 100644 --- a/multithreading/benchmark/include/BenchmarkMonitor.h +++ b/multithreading/benchmark/include/BenchmarkMonitor.h @@ -4,6 +4,7 @@ #include #include #include +#include #include "./BenchmarkMeasurement.h" @@ -130,4 +131,4 @@ namespace multithreading::benchmark { return results; } }; -} // namespace multithreading::benchmark \ No newline at end of file +} // namespace multithreading::benchmark diff --git a/multithreading/benchmark/include/PeakMemoryMeasurement.h b/multithreading/benchmark/include/PeakMemoryMeasurement.h index cd76503..602724e 100644 --- a/multithreading/benchmark/include/PeakMemoryMeasurement.h +++ b/multithreading/benchmark/include/PeakMemoryMeasurement.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include "./BenchmarkMeasurement.h" @@ -27,4 +27,4 @@ namespace multithreading::benchmark { std::optional> get_result() override; }; -} // namespace multithreading::benchmark \ No newline at end of file +} // namespace multithreading::benchmark diff --git a/multithreading/benchmark/src/AverageMemoryMeasurement.cpp b/multithreading/benchmark/src/AverageMemoryMeasurement.cpp index 101ee1f..2cca06b 100644 --- a/multithreading/benchmark/src/AverageMemoryMeasurement.cpp +++ b/multithreading/benchmark/src/AverageMemoryMeasurement.cpp @@ -2,6 +2,7 @@ #include +#include #include namespace multithreading::benchmark { diff --git a/multithreading/benchmark/src/PeakMemoryMeasurement.cpp b/multithreading/benchmark/src/PeakMemoryMeasurement.cpp index b93752a..972fbe7 100644 --- a/multithreading/benchmark/src/PeakMemoryMeasurement.cpp +++ b/multithreading/benchmark/src/PeakMemoryMeasurement.cpp @@ -1,5 +1,6 @@ #include "../include/PeakMemoryMeasurement.h" +#include #include @@ -31,8 +32,8 @@ namespace multithreading::benchmark { is_started = false; const double highest_snapshot = *std::ranges::max_element( - snapshots.begin(), - snapshots.end(), + this->snapshots.begin(), + this->snapshots.end(), std::ranges::less{}, std::identity{}); recorded_result = std::abs(highest_snapshot - baseline); diff --git a/multithreading/benchmark/src/SpeedMeasurement.cpp b/multithreading/benchmark/src/SpeedMeasurement.cpp index 7f965cc..8f30896 100644 --- a/multithreading/benchmark/src/SpeedMeasurement.cpp +++ b/multithreading/benchmark/src/SpeedMeasurement.cpp @@ -1,5 +1,6 @@ #include "../include/SpeedMeasurement.h" +#include #include namespace multithreading::benchmark { diff --git a/multithreading/benchmark/src/mcmp/MCMPBenchmarkRunner.cpp b/multithreading/benchmark/src/mcmp/MCMPBenchmarkRunner.cpp index 0415a49..ead6a7e 100644 --- a/multithreading/benchmark/src/mcmp/MCMPBenchmarkRunner.cpp +++ b/multithreading/benchmark/src/mcmp/MCMPBenchmarkRunner.cpp @@ -1,7 +1,7 @@ #include "../../include/mcmp/MCMPBenchmarkRunner.h" #include - +#include namespace multithreading::benchmark::mcmp { @@ -14,7 +14,7 @@ namespace multithreading::benchmark::mcmp { void MCMPBenchmarkRunner::run_benchmark_with( const BenchmarkMatrixItem& item ) { - const auto half_threads = static_cast(floor(item.threads_count / 2)); + const auto half_threads = static_cast(std::floor(item.threads_count / 2)); std::vector threads {}; threads.reserve(item.threads_count); @@ -35,4 +35,4 @@ namespace multithreading::benchmark::mcmp { } } } -} // namespace multithreading::benchmark::mcmp \ No newline at end of file +} // namespace multithreading::benchmark::mcmp diff --git a/multithreading/structures/include/linked_list/FGLockLinkedList.h b/multithreading/structures/include/linked_list/FGLockLinkedList.h index 1cd263e..14e01b0 100644 --- a/multithreading/structures/include/linked_list/FGLockLinkedList.h +++ b/multithreading/structures/include/linked_list/FGLockLinkedList.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include "./LinkedList.h" @@ -386,4 +387,4 @@ namespace multithreading::structures::linked_list { return impl->contains(value); } }; -} // namespace multithreading::structures::linked_list \ No newline at end of file +} // namespace multithreading::structures::linked_list diff --git a/multithreading/utilities/include/performance/Memory.h b/multithreading/utilities/include/performance/Memory.h index 88b0477..8586066 100644 --- a/multithreading/utilities/include/performance/Memory.h +++ b/multithreading/utilities/include/performance/Memory.h @@ -30,7 +30,7 @@ namespace multithreading::utilities::performance { static double currentMemoryUsage() { #ifdef __linux__ - std::ifstream status_stream("/proc/self/status", ios_base::in); + std::ifstream status_stream("/proc/self/status", std::ios_base::in); std::string line; while (std::getline(status_stream, line)) { if (line.find("VmRSS:") != std::string::npos) { @@ -64,4 +64,4 @@ namespace multithreading::utilities::performance { #endif } }; -} // namespace multithreading::utilities::performance \ No newline at end of file +} // namespace multithreading::utilities::performance From e4247788e9d5c31d513f829245a124013247866c Mon Sep 17 00:00:00 2001 From: Nikita Lapin Date: Mon, 23 Feb 2026 17:53:15 -0600 Subject: [PATCH 16/19] fix: linked_list memory leaks fix attempt --- executables/linked_list_benchmark.cpp | 7 ++++--- .../benchmark/include/BenchmarkMeasurement.h | 2 +- .../benchmark/include/BenchmarkMonitor.h | 18 +++++++++--------- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/executables/linked_list_benchmark.cpp b/executables/linked_list_benchmark.cpp index 2c493fa..b3c0124 100644 --- a/executables/linked_list_benchmark.cpp +++ b/executables/linked_list_benchmark.cpp @@ -14,6 +14,7 @@ #include #include +#include #include "./benchmarks/include/ThreadConfig.h" #include "./benchmarks/include/mcmp/LinkedListMCMPBenchmark.h" @@ -31,9 +32,9 @@ namespace executables { }; const auto monitor = std::make_shared>( RefMeasurementsList( - new SpeedMeasurement({ .verbose = "Execution Time" }), - new PeakMemoryMeasurement({ .verbose = "Peak Memory Overhead" }), - new AverageMemoryMeasurement({ .verbose = "Average Memory Usage" }) + std::make_unique(BenchmarkMeasurementTemplate{ .verbose = "Execution Time" }), + std::make_unique(BenchmarkMeasurementTemplate{ .verbose = "Peak Memory Overhead" }), + std::make_unique(BenchmarkMeasurementTemplate{ .verbose = "Average Memory Usage" }) ) ); const BenchmarkMeasurer matrixMeasurer(matrix, monitor); diff --git a/multithreading/benchmark/include/BenchmarkMeasurement.h b/multithreading/benchmark/include/BenchmarkMeasurement.h index 3b97a0a..34910ac 100644 --- a/multithreading/benchmark/include/BenchmarkMeasurement.h +++ b/multithreading/benchmark/include/BenchmarkMeasurement.h @@ -64,5 +64,5 @@ namespace multithreading::benchmark { using MeasurementsList = std::tuple...>; template - using RefMeasurementsList = std::tuple*...>; + using RefMeasurementsList = std::tuple>...>; } // namespace multithreading::benchmark \ No newline at end of file diff --git a/multithreading/benchmark/include/BenchmarkMonitor.h b/multithreading/benchmark/include/BenchmarkMonitor.h index 4d5ef8e..f614b5e 100644 --- a/multithreading/benchmark/include/BenchmarkMonitor.h +++ b/multithreading/benchmark/include/BenchmarkMonitor.h @@ -16,7 +16,7 @@ namespace multithreading::benchmark { template class BenchmarkMonitor { private: - std::tuple*...> measurements; + std::tuple>...> measurements; std::chrono::high_resolution_clock::duration heartbeat; std::atomic benchmark_running; @@ -27,7 +27,7 @@ namespace multithreading::benchmark { while (benchmark_running.load(std::memory_order_acquire)) { { std::scoped_lock lock(measurements_mutex); - std::apply([&](auto*... measurements) { + std::apply([&](auto&... measurements) { (measurements->snapshot(), ...); }, measurements); } @@ -37,18 +37,18 @@ namespace multithreading::benchmark { } public: BenchmarkMonitor( - const std::tuple*...>& measurements, + std::tuple>...> measurements, const size_t heartbeat ) - : measurements(measurements) + : measurements(std::move(measurements)) , heartbeat(std::chrono::milliseconds(heartbeat)) , benchmark_running(false) {} explicit BenchmarkMonitor( - const std::tuple*...>& measurements + std::tuple>...> measurements ) - : measurements(measurements) + : measurements(std::move(measurements)) , heartbeat(std::chrono::milliseconds(DEFAULT_HEARTBEAT_RATE)) , benchmark_running(false) {} @@ -72,7 +72,7 @@ namespace multithreading::benchmark { { std::scoped_lock lock(measurements_mutex); - std::apply([&](auto*... measurements) { + std::apply([&](auto&... measurements) { (measurements->start(), ...); }, measurements); } @@ -102,7 +102,7 @@ namespace multithreading::benchmark { heartbeat_thread.join(); { std::scoped_lock lock(measurements_mutex); - std::apply([&](auto*... measurements) { + std::apply([&](auto&... measurements) { (measurements->stop(), ...); }, measurements); } @@ -121,7 +121,7 @@ namespace multithreading::benchmark { std::tuple...> results = [&]() { std::scoped_lock lock(measurements_mutex); - return std::apply([](auto*... measurement) { + return std::apply([](auto&... measurement) { return std::tuple...>{ measurement->get_result().value()... }; From bf92070aa26a524e891503296832dcd6b1bf350f Mon Sep 17 00:00:00 2001 From: Nikita Lapin Date: Mon, 23 Feb 2026 18:01:47 -0600 Subject: [PATCH 17/19] fix: memory leaks fixed --- executables/bounded_queue_benchmark.cpp | 7 ++++--- executables/unbounded_queue_benchmark.cpp | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/executables/bounded_queue_benchmark.cpp b/executables/bounded_queue_benchmark.cpp index ff174bc..026225d 100644 --- a/executables/bounded_queue_benchmark.cpp +++ b/executables/bounded_queue_benchmark.cpp @@ -12,6 +12,7 @@ #include #include +#include #include "./benchmarks/include/ThreadConfig.h" #include "./benchmarks/include/mcmp/BoundedQueueMCMPBenchmark.h" @@ -30,9 +31,9 @@ namespace executables { }; const auto monitor = std::make_shared>( RefMeasurementsList( - new SpeedMeasurement({ .verbose = "Execution Time" }), - new PeakMemoryMeasurement({ .verbose = "Peak Memory Overhead" }), - new AverageMemoryMeasurement({ .verbose = "Average Memory Usage" }) + std::make_unique(BenchmarkMeasurementTemplate{ .verbose = "Execution Time" }), + std::make_unique(BenchmarkMeasurementTemplate{ .verbose = "Peak Memory Overhead" }), + std::make_unique(BenchmarkMeasurementTemplate{ .verbose = "Average Memory Usage" }) ) ); const BenchmarkMeasurer matrixMeasurer(matrix, monitor); diff --git a/executables/unbounded_queue_benchmark.cpp b/executables/unbounded_queue_benchmark.cpp index 9df8602..af44bbf 100644 --- a/executables/unbounded_queue_benchmark.cpp +++ b/executables/unbounded_queue_benchmark.cpp @@ -12,6 +12,7 @@ #include #include +#include #include "./benchmarks/include/ThreadConfig.h" #include "./benchmarks/include/mcmp/UnboundedQueueMCMPBenchmark.h" @@ -29,9 +30,9 @@ namespace executables { }; const auto monitor = std::make_shared>( RefMeasurementsList( - new SpeedMeasurement({ .verbose = "Execution Time" }), - new PeakMemoryMeasurement({ .verbose = "Peak Memory Overhead" }), - new AverageMemoryMeasurement({ .verbose = "Average Memory Usage" }) + std::make_unique(BenchmarkMeasurementTemplate{ .verbose = "Execution Time" }), + std::make_unique(BenchmarkMeasurementTemplate{ .verbose = "Peak Memory Overhead" }), + std::make_unique(BenchmarkMeasurementTemplate{ .verbose = "Average Memory Usage" }) ) ); const BenchmarkMeasurer matrixMeasurer(matrix, monitor); From 46ecca43a48d2d5bdd088fdd8d11cae76150b8b3 Mon Sep 17 00:00:00 2001 From: isoldpower Date: Mon, 23 Feb 2026 20:05:59 -0600 Subject: [PATCH 18/19] fix: clang-tidy build issues fixed --- .clang-tidy | 38 +++++++++++++++++-- .github/workflows/multi-platform.yaml | 2 +- executables/bounded_queue_benchmark.cpp | 21 +++++----- executables/linked_list_benchmark.cpp | 22 ++++++----- executables/unbounded_queue_benchmark.cpp | 20 +++++----- .../benchmark/include/BenchmarkMeasurement.h | 4 +- .../src/AverageMemoryMeasurement.cpp | 13 ++++--- .../benchmark/src/PeakMemoryMeasurement.cpp | 8 +++- .../benchmark/src/SpeedMeasurement.cpp | 9 ++--- 9 files changed, 91 insertions(+), 46 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 748c212..b342dfa 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,5 +1,37 @@ -Checks: 'clang-diagnostic-*,clang-analyzer-*,cppcoreguidelines-*,modernize-*,*,-clang-diagnostic-error,-llvmlibc-*,-llvm-include-order,-misc-include-cleaner,-fuchsia-trailing-return,-modernize-use-trailing-return-type,-altera-unroll-loops,-misc-use-anonymous-namespace,-portability-template-virtual-member-function,-altera-id-dependent-backward-branch' -WarningsAsErrors: 'clang-diagnostic-*,clang-analyzer-*,cppcoreguidelines-*,modernize-*,*,-clang-diagnostic-error,-llvmlibc-*,-llvm-include-order,-misc-include-cleaner,-fuchsia-trailing-return,-modernize-use-trailing-return-type,-altera-unroll-loops,-misc-use-anonymous-namespace,-portability-template-virtual-member-function,-altera-id-dependent-backward-branch' +Checks: > + clang-diagnostic-*, + clang-analyzer-*, + cppcoreguidelines-*, + modernize-*, + *, + -clang-diagnostic-error, + -llvmlibc-*, + -llvm-include-order, + -misc-include-cleaner, + -fuchsia-trailing-return, + -modernize-use-trailing-return-type, + -altera-unroll-loops, + -misc-use-anonymous-namespace, + -portability-template-virtual-member-function, + -altera-id-dependent-backward-branch, + -boost-use-ranges +WarningsAsErrors: > + clang-diagnostic-*, + clang-analyzer-*, + cppcoreguidelines-*, + modernize-*, + *, + -clang-diagnostic-error, + -llvmlibc-*, + -llvm-include-order, + -misc-include-cleaner, + -fuchsia-trailing-return, + -modernize-use-trailing-return-type, + -altera-unroll-loops, + -misc-use-anonymous-namespace, + -portability-template-virtual-member-function, + -altera-id-dependent-backward-branch, + -boost-use-ranges HeaderFilterRegex: '^(?!.*/build/).*' FormatStyle: google CheckOptions: @@ -30,4 +62,4 @@ CheckOptions: - key: modernize-replace-auto-ptr.IncludeStyle value: llvm - key: modernize-use-nullptr.NullMacros - value: 'NULL' \ No newline at end of file + value: 'NULL' diff --git a/.github/workflows/multi-platform.yaml b/.github/workflows/multi-platform.yaml index df9c1fe..1cf5f6d 100644 --- a/.github/workflows/multi-platform.yaml +++ b/.github/workflows/multi-platform.yaml @@ -13,7 +13,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest, macos-latest] + os: [ubuntu-latest, macos-latest] build_type: [Release] c_compiler: [gcc, clang, cl] include: diff --git a/executables/bounded_queue_benchmark.cpp b/executables/bounded_queue_benchmark.cpp index 026225d..0928c44 100644 --- a/executables/bounded_queue_benchmark.cpp +++ b/executables/bounded_queue_benchmark.cpp @@ -17,23 +17,24 @@ #include "./benchmarks/include/ThreadConfig.h" #include "./benchmarks/include/mcmp/BoundedQueueMCMPBenchmark.h" - +// NOLINTBEGIN(google-build-using-namespace) using namespace multithreading::structures::bounded_queue; using namespace multithreading::benchmark; +// NOLINTEND(google-build-using-namespace) namespace executables { template static void benchmarkApplication(const std::array>, N>& queues) { const BenchmarkMatrixDefinition matrix { - .per_thread_sizes = std::vector{ benchmarks::THREAD_SIZE, benchmarks::THREAD_SIZE * 10 }, - .threads_count = std::vector{ benchmarks::THREADS_COUNT, benchmarks::THREADS_COUNT * 2 } + .per_thread_sizes = std::vector{ {benchmarks::THREAD_SIZE, benchmarks::THREAD_SIZE * 10}, std::allocator{} }, + .threads_count = std::vector{ {benchmarks::THREADS_COUNT, benchmarks::THREADS_COUNT * 2}, std::allocator{} } }; const auto monitor = std::make_shared>( RefMeasurementsList( - std::make_unique(BenchmarkMeasurementTemplate{ .verbose = "Execution Time" }), - std::make_unique(BenchmarkMeasurementTemplate{ .verbose = "Peak Memory Overhead" }), - std::make_unique(BenchmarkMeasurementTemplate{ .verbose = "Average Memory Usage" }) + std::make_unique(BenchmarkMeasurementTemplate{ .verbose = std::string{"Execution Time", std::allocator{}} }), + std::make_unique(BenchmarkMeasurementTemplate{ .verbose = std::string{"Peak Memory Overhead", std::allocator{}} }), + std::make_unique(BenchmarkMeasurementTemplate{ .verbose = std::string{"Average Memory Usage", std::allocator{}} }) ) ); const BenchmarkMeasurer matrixMeasurer(matrix, monitor); @@ -54,8 +55,8 @@ namespace executables { auto main() -> int { multithreading::utilities::Application benchmarkApplication( multithreading::utilities::ApplicationInfo{ - .appName="Unbounded Queue Benchmark", - .appVersion="1.0.0", + .appName=std::string{"Unbounded Queue Benchmark", std::allocator{}}, + .appVersion=std::string{"1.0.0", std::allocator{}}, .beforeTask = std::nullopt, .afterTask = std::nullopt } @@ -63,11 +64,11 @@ auto main() -> int { const std::array queues { BenchmarkTask>( std::make_shared>(executables::benchmarks::QUEUE_SIZE), - "Lock-free Lock Queue Benchmark" + std::string{"Lock-free Lock Queue Benchmark", std::allocator{}} ), BenchmarkTask>( std::make_shared>(executables::benchmarks::QUEUE_SIZE), - "Fine-Grained Lock Queue Benchmark" + std::string{"Fine-Grained Lock Queue Benchmark", std::allocator{}} ), }; diff --git a/executables/linked_list_benchmark.cpp b/executables/linked_list_benchmark.cpp index b3c0124..dabf41b 100644 --- a/executables/linked_list_benchmark.cpp +++ b/executables/linked_list_benchmark.cpp @@ -19,22 +19,24 @@ #include "./benchmarks/include/ThreadConfig.h" #include "./benchmarks/include/mcmp/LinkedListMCMPBenchmark.h" +// NOLINTBEGIN(google-build-using-namespace) using namespace multithreading::structures::linked_list; using namespace multithreading::benchmark; +// NOLINTEND(google-build-using-namespace) namespace executables { template static void benchmarkApplication(const std::array>, N>& lists) { const BenchmarkMatrixDefinition matrix { - .per_thread_sizes = std::vector{ benchmarks::THREAD_SIZE, benchmarks::THREAD_SIZE * 10 }, - .threads_count = std::vector{ benchmarks::THREADS_COUNT, benchmarks::THREADS_COUNT * 2 } + .per_thread_sizes = std::vector{ {benchmarks::THREAD_SIZE, benchmarks::THREAD_SIZE * 10}, std::allocator{} }, + .threads_count = std::vector{ {benchmarks::THREADS_COUNT, benchmarks::THREADS_COUNT * 2}, std::allocator{} } }; const auto monitor = std::make_shared>( RefMeasurementsList( - std::make_unique(BenchmarkMeasurementTemplate{ .verbose = "Execution Time" }), - std::make_unique(BenchmarkMeasurementTemplate{ .verbose = "Peak Memory Overhead" }), - std::make_unique(BenchmarkMeasurementTemplate{ .verbose = "Average Memory Usage" }) + std::make_unique(BenchmarkMeasurementTemplate{ .verbose = std::string{"Execution Time", std::allocator{}} }), + std::make_unique(BenchmarkMeasurementTemplate{ .verbose = std::string{"Peak Memory Overhead", std::allocator{}} }), + std::make_unique(BenchmarkMeasurementTemplate{ .verbose = std::string{"Average Memory Usage", std::allocator{}} }) ) ); const BenchmarkMeasurer matrixMeasurer(matrix, monitor); @@ -55,8 +57,8 @@ namespace executables { auto main() -> int { multithreading::utilities::Application benchmarkApplication( multithreading::utilities::ApplicationInfo{ - .appName="Linked List Benchmark", - .appVersion="1.0.0", + .appName=std::string{"Linked List Benchmark", std::allocator{}}, + .appVersion=std::string{"1.0.0", std::allocator{}}, .beforeTask = std::nullopt, .afterTask = std::nullopt } @@ -65,11 +67,11 @@ auto main() -> int { const std::array lists { BenchmarkTask>( std::make_shared>(), - "Lock-free Linked List Benchmark" + std::string{"Lock-free Linked List Benchmark", std::allocator{}} ), BenchmarkTask>( std::make_shared>(), - "Fine-Grained Linked List Benchmark" + std::string{"Fine-Grained Linked List Benchmark", std::allocator{}} ), }; @@ -85,4 +87,4 @@ auto main() -> int { return -1; } -} \ No newline at end of file +} diff --git a/executables/unbounded_queue_benchmark.cpp b/executables/unbounded_queue_benchmark.cpp index af44bbf..a58b143 100644 --- a/executables/unbounded_queue_benchmark.cpp +++ b/executables/unbounded_queue_benchmark.cpp @@ -17,22 +17,24 @@ #include "./benchmarks/include/ThreadConfig.h" #include "./benchmarks/include/mcmp/UnboundedQueueMCMPBenchmark.h" +// NOLINTBEGIN(google-build-using-namespace) using namespace multithreading::structures::unbounded_queue; using namespace multithreading::benchmark; +// NOLINTEND(google-build-using-namespace) namespace executables { template static void benchmarkApplication(const std::array>, N>& queues) { const BenchmarkMatrixDefinition matrix { - .per_thread_sizes = std::vector{ benchmarks::THREAD_SIZE, benchmarks::THREAD_SIZE * 10 }, - .threads_count = std::vector{ benchmarks::THREADS_COUNT, benchmarks::THREADS_COUNT * 2 } + .per_thread_sizes = std::vector{ {benchmarks::THREAD_SIZE, benchmarks::THREAD_SIZE * 10}, std::allocator{} }, + .threads_count = std::vector{ {benchmarks::THREADS_COUNT, benchmarks::THREADS_COUNT * 2}, std::allocator{} } }; const auto monitor = std::make_shared>( RefMeasurementsList( - std::make_unique(BenchmarkMeasurementTemplate{ .verbose = "Execution Time" }), - std::make_unique(BenchmarkMeasurementTemplate{ .verbose = "Peak Memory Overhead" }), - std::make_unique(BenchmarkMeasurementTemplate{ .verbose = "Average Memory Usage" }) + std::make_unique(BenchmarkMeasurementTemplate{ .verbose = std::string{"Execution Time", std::allocator{}} }), + std::make_unique(BenchmarkMeasurementTemplate{ .verbose = std::string{"Peak Memory Overhead", std::allocator{}} }), + std::make_unique(BenchmarkMeasurementTemplate{ .verbose = std::string{"Average Memory Usage", std::allocator{}} }) ) ); const BenchmarkMeasurer matrixMeasurer(matrix, monitor); @@ -53,8 +55,8 @@ namespace executables { auto main() -> int { multithreading::utilities::Application benchmarkApplication( multithreading::utilities::ApplicationInfo{ - .appName="Unbounded Queue Benchmark", - .appVersion="1.0.0", + .appName=std::string{"Unbounded Queue Benchmark", std::allocator{}}, + .appVersion=std::string{"1.0.0", std::allocator{}}, .beforeTask = std::nullopt, .afterTask = std::nullopt } @@ -64,11 +66,11 @@ auto main() -> int { std::make_shared>(LockFreeQueueConfig{ .maxUpdateDepth = 10000 }), - "Lock-free Queue Benchmark" + std::string{"Lock-free Queue Benchmark", std::allocator{}} ), BenchmarkTask>( std::make_shared>(), - "Fine-Grained Lock Queue Benchmark" + std::string{"Fine-Grained Lock Queue Benchmark", std::allocator{}} ) }; diff --git a/multithreading/benchmark/include/BenchmarkMeasurement.h b/multithreading/benchmark/include/BenchmarkMeasurement.h index 34910ac..2b8d9db 100644 --- a/multithreading/benchmark/include/BenchmarkMeasurement.h +++ b/multithreading/benchmark/include/BenchmarkMeasurement.h @@ -3,6 +3,8 @@ #include #include #include +#include + namespace multithreading::benchmark { @@ -65,4 +67,4 @@ namespace multithreading::benchmark { template using RefMeasurementsList = std::tuple>...>; -} // namespace multithreading::benchmark \ No newline at end of file +} // namespace multithreading::benchmark diff --git a/multithreading/benchmark/src/AverageMemoryMeasurement.cpp b/multithreading/benchmark/src/AverageMemoryMeasurement.cpp index 2cca06b..f6877f1 100644 --- a/multithreading/benchmark/src/AverageMemoryMeasurement.cpp +++ b/multithreading/benchmark/src/AverageMemoryMeasurement.cpp @@ -15,13 +15,13 @@ namespace multithreading::benchmark { void AverageMemoryMeasurement::start() { is_started = true; snapshots.clear(); - baseline = utilities::performance::MemoryMeasurement::currentMemoryUsage(); + baseline = static_cast(utilities::performance::MemoryMeasurement::currentMemoryUsage()); } void AverageMemoryMeasurement::snapshot() { if (is_started) { - const size_t memory_usage = - utilities::performance::MemoryMeasurement::currentMemoryUsage(); + const auto memory_usage = + static_cast(utilities::performance::MemoryMeasurement::currentMemoryUsage()); const std::chrono::nanoseconds timestamp = std::chrono::high_resolution_clock::now().time_since_epoch(); @@ -53,9 +53,12 @@ namespace multithreading::benchmark { return accumulated + (item.second - baseline); } ); - const double snapshots_average = static_cast(snapshots_summary) / + const auto snapshots_average = static_cast(snapshots_summary) / static_cast(snapshots.size()); - return BenchmarkMeasurementResult(information, snapshots_average, "kB"); + return BenchmarkMeasurementResult( + information, + snapshots_average, + std::string{"kB", std::allocator{}}); } } // namespace multithreading::benchmark diff --git a/multithreading/benchmark/src/PeakMemoryMeasurement.cpp b/multithreading/benchmark/src/PeakMemoryMeasurement.cpp index 972fbe7..1920094 100644 --- a/multithreading/benchmark/src/PeakMemoryMeasurement.cpp +++ b/multithreading/benchmark/src/PeakMemoryMeasurement.cpp @@ -9,7 +9,7 @@ namespace multithreading::benchmark { PeakMemoryMeasurement::PeakMemoryMeasurement(const BenchmarkMeasurementTemplate& measurement) : BenchmarkMeasurement(measurement) , baseline(0) - , snapshots({}) + , snapshots(std::vector{}, std::allocator{}) , recorded_result(0) {} @@ -45,6 +45,10 @@ namespace multithreading::benchmark { return std::nullopt; } - return BenchmarkMeasurementResult(information, recorded_result, "kB"); + return BenchmarkMeasurementResult( + information, + recorded_result, + std::string{"kB", std::allocator{}} + ); } } // namespace multithreading::benchmark diff --git a/multithreading/benchmark/src/SpeedMeasurement.cpp b/multithreading/benchmark/src/SpeedMeasurement.cpp index 8f30896..024b93e 100644 --- a/multithreading/benchmark/src/SpeedMeasurement.cpp +++ b/multithreading/benchmark/src/SpeedMeasurement.cpp @@ -1,5 +1,6 @@ #include "../include/SpeedMeasurement.h" +#include #include #include @@ -35,14 +36,12 @@ namespace multithreading::benchmark { const auto duration = std::chrono::duration_cast( end_time - start_time ); - const DurationType duration_ms = std::round( - static_cast(duration.count()) / static_cast(utilities::NS_PER_MICS) - ); + const double duration_ms = static_cast(duration.count()) / static_cast(utilities::NS_PER_MICS); return BenchmarkMeasurementResult( information, - duration_ms, - "mics" + std::llround(duration_ms), + std::string{"mics", std::allocator{}} ); } } // namespace multithreading::benchmark From 006e8d35f03ec86ed5bf1d5832b4c7d296feafaf Mon Sep 17 00:00:00 2001 From: isoldpower Date: Mon, 23 Feb 2026 20:14:19 -0600 Subject: [PATCH 19/19] fix: exclude windows from ci target platforms windows is not supported for benchmarking and multithreading as it has some serious limitations on retreiving the memory usage information --- .github/workflows/multi-platform.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/multi-platform.yaml b/.github/workflows/multi-platform.yaml index 1cf5f6d..0b8f177 100644 --- a/.github/workflows/multi-platform.yaml +++ b/.github/workflows/multi-platform.yaml @@ -13,13 +13,10 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest] + os: [ubuntu-latest, macos-latest, windows-latest] build_type: [Release] c_compiler: [gcc, clang, cl] include: - - os: windows-latest - c_compiler: cl - cpp_compiler: cl - os: ubuntu-latest c_compiler: gcc cpp_compiler: g++ @@ -33,6 +30,8 @@ jobs: c_compiler: clang cpp_compiler: clang++ exclude: + - os: windows-latest + c_compiler: cl - os: windows-latest c_compiler: gcc - os: windows-latest