Skip to content

Commit ad4d512

Browse files
committed
fix: add thread safe caching
1 parent ef90b61 commit ad4d512

7 files changed

Lines changed: 263 additions & 64 deletions

File tree

CMakeLists.txt

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -46,30 +46,30 @@ set(COMPILE_DEFINITIONS
4646
-DDYNLIBUTILS_SEPARATE_SOURCE_FILES
4747
)
4848

49-
set(SOURCE_FILES
50-
${SOURCE_DIR}/module.cpp
51-
)
52-
5349
set(INCLUDE_DIRS
5450
${INCLUDE_DIR}
5551
)
5652

5753
if(WINDOWS)
58-
list(APPEND SOURCE_FILES
59-
${SOURCE_DIR}/windows/module.cpp
54+
set(SOURCE_FILES
55+
${SOURCE_DIR}/windows/module.cpp
6056
)
6157
elseif(LINUX)
62-
list(APPEND SOURCE_FILES
63-
${SOURCE_DIR}/linux/module.cpp
58+
set(SOURCE_FILES
59+
${SOURCE_DIR}/linux/module.cpp
6460
)
6561
elseif(MACOS)
66-
list(APPEND SOURCE_FILES
67-
${SOURCE_DIR}/apple/module.cpp
62+
set(SOURCE_FILES
63+
${SOURCE_DIR}/apple/module.cpp
6864
)
6965
else()
7066
message(FATAL_ERROR "Unsupported platform")
7167
endif()
7268

69+
list(APPEND SOURCE_FILES
70+
${SOURCE_DIR}/module.cpp # always include last
71+
)
72+
7373
add_library(${PROJECT_NAME} STATIC ${SOURCE_FILES})
7474

7575
set_target_properties(${PROJECT_NAME} PROPERTIES

cmake/platform/linux.cmake

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,28 @@ set(PLATFORM_COMPILE_OPTIONS
1414

1515
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
1616
set(PLATFORM_COMPILE_OPTIONS
17-
${PLATFORM_COMPILE_OPTIONS}
18-
19-
-g3 -ggdb
17+
${PLATFORM_COMPILE_OPTIONS}
18+
-g3 -gdwarf-4 -fno-omit-frame-pointer -fno-inline
2019
)
20+
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
21+
set(PLATFORM_COMPILE_OPTIONS
22+
${PLATFORM_COMPILE_OPTIONS}
23+
-fstandalone-debug -glldb
24+
)
25+
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
26+
set(PLATFORM_COMPILE_OPTIONS
27+
${PLATFORM_COMPILE_OPTIONS}
28+
-ggdb3
29+
-fno-eliminate-unused-debug-types
30+
-femit-class-debug-always
31+
-fvar-tracking
32+
-fvar-tracking-assignments
33+
-grecord-gcc-switches
34+
)
35+
endif()
2136
endif()
2237

38+
2339
set(PLATFORM_COMPILE_DEFINITIONS
2440
${PLATFORM_COMPILE_DEFINITIONS}
2541

include/dynlibutils/module.hpp

Lines changed: 131 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@
2020
#include <type_traits>
2121
#include <utility>
2222
#include <vector>
23+
#include <variant>
24+
#include <span>
25+
#include <unordered_map>
26+
#include <mutex>
27+
#include <shared_mutex>
2328

2429
#ifdef __cpp_concepts
2530
# include <concepts>
@@ -92,21 +97,31 @@ concept PatternCallback_t = requires(T func, std::size_t index, CMemory match)
9297
#elif defined(_MSC_VER)
9398
# pragma warning(error: 4714)
9499
# define DYNLIB_FORCE_INLINE [[msvc::forceinline]]
95-
# define DYNLIB_NOINLINE __declspec(noinline)
100+
# define DYNLIB_NOINLINE [[msvc::noinline]]
96101
#else
97102
# define DYNLIB_FORCE_INLINE inline
98103
# define DYNLIB_NOINLINE
99104
#endif
100105

106+
#if __has_cpp_attribute(no_unique_address)
107+
#if defined(_MSC_VER) && _MSC_VER >= 1929
108+
# define DYNLIB_NUA [[msvc::no_unique_address]]
109+
#else
110+
# define DYNLIB_NUA [[no_unique_address]]
111+
#endif
112+
#else
113+
# define DYNLIB_NUA
114+
#endif
115+
101116
template<std::size_t INDEX = 0, std::size_t N, std::size_t SIZE = (N - 1) / 2>
102117
DYNLIB_FORCE_INLINE DYNLIB_COMPILE_TIME_EXPR void ProcessStringPattern(const char (&szInput)[N], std::size_t& n, std::size_t& nIndex, std::array<std::uint8_t, SIZE>& aBytes, std::array<char, SIZE>& aMask)
103118
{
104119
static_assert(SIZE > 0, "Process pattern cannot be empty");
105120

106121
constexpr auto funcIsHexDigit = [](char c) -> bool
107122
{
108-
return ('0' <= c && c <= '9') ||
109-
('A' <= c && c <= 'F') ||
123+
return ('0' <= c && c <= '9') ||
124+
('A' <= c && c <= 'F') ||
110125
('a' <= c && c <= 'f');
111126
};
112127

@@ -127,7 +142,7 @@ DYNLIB_FORCE_INLINE DYNLIB_COMPILE_TIME_EXPR void ProcessStringPattern(const cha
127142
{
128143
const char c = szInput[n];
129144

130-
if (c == ' ')
145+
if (c == ' ')
131146
{
132147
n++;
133148
ProcessStringPattern<INDEX + 1>(szInput, n, nIndex, aBytes, aMask);
@@ -259,22 +274,95 @@ inline auto ParsePattern(const std::string_view svInput)
259274
return result;
260275
}
261276

262-
class CModule : public CMemory
277+
struct CCache
278+
{
279+
std::string m_svPattern;
280+
uintptr_t m_nStart;
281+
uintptr_t m_pSectionAddr;
282+
size_t m_nSectionSize;
283+
284+
CCache(std::string_view svName, uintptr_t nMeta = 0)
285+
: m_svPattern(svName)
286+
, m_nStart(nMeta)
287+
, m_pSectionAddr(0)
288+
, m_nSectionSize(0) {
289+
}
290+
291+
CCache(
292+
const std::uint8_t* pPatternMem,
293+
const size_t nSize,
294+
const CMemory pStartAddress = nullptr,
295+
const Section_t* pModuleSection = nullptr
296+
)
297+
: m_svPattern(pPatternMem, pPatternMem + nSize)
298+
, m_nStart(pStartAddress.GetAddr())
299+
, m_pSectionAddr(pModuleSection ? pModuleSection->GetAddr() : 0)
300+
, m_nSectionSize(pModuleSection ? pModuleSection->m_nSectionSize : 0) {
301+
}
302+
303+
bool operator==(const CCache& rhs) const noexcept
304+
{
305+
return m_svPattern == rhs.m_svPattern &&
306+
m_nStart == rhs.m_nStart &&
307+
m_pSectionAddr == rhs.m_pSectionAddr &&
308+
m_nSectionSize == rhs.m_nSectionSize;
309+
}
310+
311+
bool operator<(const CCache& rhs) const noexcept
312+
{
313+
if (m_svPattern != rhs.m_svPattern)
314+
return m_svPattern < rhs.m_svPattern;
315+
if (m_nStart != rhs.m_nStart)
316+
return m_nStart < rhs.m_nStart;
317+
if (m_pSectionAddr != rhs.m_pSectionAddr)
318+
return m_pSectionAddr < rhs.m_pSectionAddr;
319+
return m_nSectionSize < rhs.m_nSectionSize;
320+
}
321+
};
322+
323+
struct CHash
263324
{
325+
std::size_t operator()(const CCache& k) const noexcept
326+
{
327+
static constexpr std::size_t golden_ratio = 0x9e3779b9u;
328+
std::size_t h = std::hash<std::string>()(k.m_svPattern);
329+
h ^= std::hash<uintptr_t>()(k.m_nStart) + golden_ratio + (h << 6) + (h >> 2);
330+
h ^= std::hash<uintptr_t>()(k.m_pSectionAddr) + golden_ratio + (h << 6) + (h >> 2);
331+
h ^= std::hash<size_t>()(k.m_nSectionSize) + golden_ratio + (h << 6) + (h >> 2);
332+
return h;
333+
}
334+
};
335+
336+
struct CNullMutex
337+
{
338+
void lock() const {}
339+
void unlock() const {}
340+
bool try_lock() const { return true; }
341+
342+
void lock_shared() const noexcept {}
343+
void unlock_shared() const noexcept {}
344+
bool try_lock_shared() const noexcept { return true; }
345+
};
346+
347+
template<typename Mutex = CNullMutex>
348+
class CAssemblyModule : public CMemory
349+
{
350+
using UniqueLock_t = std::unique_lock<Mutex>;
351+
using SharedLock_t = std::shared_lock<Mutex>;
264352
public:
265353
template<std::size_t SIZE>
266354
class CSignatureView : public Pattern_t<SIZE>
267355
{
268356
using Base_t = Pattern_t<SIZE>;
269357

270358
private:
271-
CModule* m_pModule;
359+
CAssemblyModule* m_pModule;
272360

273361
public:
274362
constexpr CSignatureView() : m_pModule(nullptr) {}
275363
constexpr CSignatureView(CSignatureView&& moveFrom) : Base_t(std::move(moveFrom)), m_pModule(std::move(moveFrom.m_pModule)) {}
276-
constexpr CSignatureView(const Base_t& pattern, CModule* module) : Base_t(pattern), m_pModule(module) {}
277-
constexpr CSignatureView(Base_t&& pattern, CModule* module) : Base_t(std::move(pattern)), m_pModule(module) {}
364+
constexpr CSignatureView(const Base_t& pattern, CAssemblyModule* module) : Base_t(pattern), m_pModule(module) {}
365+
constexpr CSignatureView(Base_t&& pattern, CAssemblyModule* module) : Base_t(std::move(pattern)), m_pModule(module) {}
278366

279367
bool IsValid() const { return m_pModule && m_pModule->IsValid(); }
280368

@@ -294,23 +382,30 @@ class CModule : public CMemory
294382
}; // class CSignatureView<SIZE>
295383

296384
private:
385+
[[nodiscard]] CMemory GetVirtualTable(const std::string_view svTableName, bool bDecorated = false) const;
386+
[[nodiscard]] CMemory GetFunction(const std::string_view svFunctionName) const noexcept;
387+
CMemory GetAddress(const CCache& hKey) const noexcept;
388+
297389
std::string m_sPath;
298390
std::string m_sLastError;
299391
std::vector<Section_t> m_vecSections;
300392

301393
const Section_t *m_pExecutableSection;
302394

395+
alignas(std::hardware_constructive_interference_size) mutable std::unordered_map<CCache, CMemory, CHash> m_mapCached;
396+
DYNLIB_NUA mutable Mutex m_mutex;
397+
303398
public:
304-
CModule() : m_pExecutableSection(nullptr) {}
305-
~CModule();
399+
CAssemblyModule() : m_pExecutableSection(nullptr) {}
400+
~CAssemblyModule();
306401

307-
CModule(const CModule&) = delete;
308-
CModule& operator=(const CModule&) = delete;
309-
CModule(CModule&& other) noexcept : CMemory(std::exchange(static_cast<CMemory &>(other), DYNLIB_INVALID_MEMORY)), m_sPath(std::move(other.m_sPath)), m_vecSections(std::move(other.m_vecSections)), m_pExecutableSection(std::move(other.m_pExecutableSection)) {}
310-
CModule(const CMemory pModuleMemory);
311-
explicit CModule(const std::string_view svModuleName);
312-
explicit CModule(const char* pszModuleName) : CModule(std::string_view(pszModuleName)) {}
313-
explicit CModule(const std::string& sModuleName) : CModule(std::string_view(sModuleName)) {}
402+
CAssemblyModule(const CAssemblyModule&) = delete;
403+
CAssemblyModule& operator=(const CAssemblyModule&) = delete;
404+
CAssemblyModule(CAssemblyModule&& other) noexcept : CMemory(std::exchange(static_cast<CMemory &>(other), DYNLIB_INVALID_MEMORY)), m_sPath(std::move(other.m_sPath)), m_vecSections(std::move(other.m_vecSections)), m_pExecutableSection(std::move(other.m_pExecutableSection)) {}
405+
CAssemblyModule(const CMemory pModuleMemory);
406+
explicit CAssemblyModule(const std::string_view svModuleName);
407+
explicit CAssemblyModule(const char* pszModuleName) : CAssemblyModule(std::string_view(pszModuleName)) {}
408+
explicit CAssemblyModule(const std::string& sModuleName) : CAssemblyModule(std::string_view(sModuleName)) {}
314409

315410
bool LoadFromPath(const std::string_view svModelePath, int flags);
316411

@@ -348,6 +443,12 @@ class CModule : public CMemory
348443
{
349444
const auto* pPattern = pPatternMem.RCastView();
350445

446+
CCache sKey(pPattern, svMask.size(), pStartAddress, pModuleSection);
447+
if (auto pAddr = GetAddress(sKey))
448+
{
449+
return pAddr;
450+
}
451+
351452
const Section_t* pSection = pModuleSection ? pModuleSection : m_pExecutableSection;
352453

353454
if (!pSection || !pSection->IsValid())
@@ -394,10 +495,10 @@ class CModule : public CMemory
394495
}
395496

396497
// How far ahead (in bytes) to prefetch during scanning.
397-
// This is calculated based on how many SIMD blocks (16 bytes each) will be read
498+
// This is calculated based on how many SIMD blocks (16 bytes each) will be read
398499
// in the current pattern match attempt.
399500
//
400-
// Helps reduce cache misses during large linear memory scans by hinting the CPU
501+
// Helps reduce cache misses during large linear memory scans by hinting the CPU
401502
// to load the next block of memory before it is needed.
402503
const std::size_t lookAhead = numBlocks * kSimdBytes;
403504

@@ -422,7 +523,11 @@ class CModule : public CMemory
422523
}
423524

424525
if (bFound)
526+
{
527+
UniqueLock_t lock(m_mutex);
528+
m_mapCached[std::move(sKey)] = pData;
425529
return pData;
530+
}
426531
}
427532

428533
return DYNLIB_INVALID_MEMORY;
@@ -493,15 +598,20 @@ class CModule : public CMemory
493598

494599
protected:
495600
void SaveLastError();
496-
}; // class CModule
601+
}; // class CAssemblyModule
497602

498-
class Module final : CModule
603+
using CModule = CAssemblyModule<CNullMutex>;
604+
605+
class Module final : public CModule
499606
{
500607
public:
501608
using CBase = CModule;
502609
using CBase::CBase;
503610
};
504611

612+
extern template class CAssemblyModule<CNullMutex>;
613+
extern template class CAssemblyModule<std::shared_mutex>;
614+
505615
} // namespace DynLibUtils
506616

507617
#endif // DYNLIBUTILS_MODULE_HPP

0 commit comments

Comments
 (0)