2020#include < type_traits>
2121#include < utility>
2222#include < vector>
23+ #include < variant>
24+ #include < span>
25+ #include < unordered_map>
26+ #include < mutex>
27+ #include < shared_mutex>
2328
2429#ifdef __cpp_concepts
2530# include < concepts>
@@ -92,21 +97,31 @@ concept PatternCallback_t = requires(T func, std::size_t index, CMemory match)
9297#elif defined(_MSC_VER)
9398# pragma warning(error: 4714)
9499# define DYNLIB_FORCE_INLINE [[msvc::forceinline]]
95- # define DYNLIB_NOINLINE __declspec ( noinline)
100+ # define DYNLIB_NOINLINE [[msvc:: noinline]]
96101#else
97102# define DYNLIB_FORCE_INLINE inline
98103# define DYNLIB_NOINLINE
99104#endif
100105
106+ #if __has_cpp_attribute(no_unique_address)
107+ #if defined(_MSC_VER) && _MSC_VER >= 1929
108+ # define DYNLIB_NUA [[msvc::no_unique_address]]
109+ #else
110+ # define DYNLIB_NUA [[no_unique_address]]
111+ #endif
112+ #else
113+ # define DYNLIB_NUA
114+ #endif
115+
101116template <std::size_t INDEX = 0 , std::size_t N, std::size_t SIZE = (N - 1 ) / 2 >
102117DYNLIB_FORCE_INLINE DYNLIB_COMPILE_TIME_EXPR void ProcessStringPattern (const char (&szInput)[N], std::size_t& n, std::size_t& nIndex, std::array<std::uint8_t, SIZE>& aBytes, std::array<char, SIZE>& aMask)
103118{
104119 static_assert (SIZE > 0 , " Process pattern cannot be empty" );
105120
106121 constexpr auto funcIsHexDigit = [](char c) -> bool
107122 {
108- return (' 0' <= c && c <= ' 9' ) ||
109- (' A' <= c && c <= ' F' ) ||
123+ return (' 0' <= c && c <= ' 9' ) ||
124+ (' A' <= c && c <= ' F' ) ||
110125 (' a' <= c && c <= ' f' );
111126 };
112127
@@ -127,7 +142,7 @@ DYNLIB_FORCE_INLINE DYNLIB_COMPILE_TIME_EXPR void ProcessStringPattern(const cha
127142 {
128143 const char c = szInput[n];
129144
130- if (c == ' ' )
145+ if (c == ' ' )
131146 {
132147 n++;
133148 ProcessStringPattern<INDEX + 1 >(szInput, n, nIndex, aBytes, aMask);
@@ -259,22 +274,95 @@ inline auto ParsePattern(const std::string_view svInput)
259274 return result;
260275}
261276
262- class CModule : public CMemory
277+ struct CCache
278+ {
279+ std::string m_svPattern;
280+ uintptr_t m_nStart;
281+ uintptr_t m_pSectionAddr;
282+ size_t m_nSectionSize;
283+
284+ CCache (std::string_view svName, uintptr_t nMeta = 0 )
285+ : m_svPattern(svName)
286+ , m_nStart(nMeta)
287+ , m_pSectionAddr(0 )
288+ , m_nSectionSize(0 ) {
289+ }
290+
291+ CCache (
292+ const std::uint8_t * pPatternMem,
293+ const size_t nSize,
294+ const CMemory pStartAddress = nullptr ,
295+ const Section_t* pModuleSection = nullptr
296+ )
297+ : m_svPattern(pPatternMem, pPatternMem + nSize)
298+ , m_nStart(pStartAddress.GetAddr())
299+ , m_pSectionAddr(pModuleSection ? pModuleSection->GetAddr () : 0)
300+ , m_nSectionSize(pModuleSection ? pModuleSection->m_nSectionSize : 0 ) {
301+ }
302+
303+ bool operator ==(const CCache& rhs) const noexcept
304+ {
305+ return m_svPattern == rhs.m_svPattern &&
306+ m_nStart == rhs.m_nStart &&
307+ m_pSectionAddr == rhs.m_pSectionAddr &&
308+ m_nSectionSize == rhs.m_nSectionSize ;
309+ }
310+
311+ bool operator <(const CCache& rhs) const noexcept
312+ {
313+ if (m_svPattern != rhs.m_svPattern )
314+ return m_svPattern < rhs.m_svPattern ;
315+ if (m_nStart != rhs.m_nStart )
316+ return m_nStart < rhs.m_nStart ;
317+ if (m_pSectionAddr != rhs.m_pSectionAddr )
318+ return m_pSectionAddr < rhs.m_pSectionAddr ;
319+ return m_nSectionSize < rhs.m_nSectionSize ;
320+ }
321+ };
322+
323+ struct CHash
263324{
325+ std::size_t operator ()(const CCache& k) const noexcept
326+ {
327+ static constexpr std::size_t golden_ratio = 0x9e3779b9u ;
328+ std::size_t h = std::hash<std::string>()(k.m_svPattern );
329+ h ^= std::hash<uintptr_t >()(k.m_nStart ) + golden_ratio + (h << 6 ) + (h >> 2 );
330+ h ^= std::hash<uintptr_t >()(k.m_pSectionAddr ) + golden_ratio + (h << 6 ) + (h >> 2 );
331+ h ^= std::hash<size_t >()(k.m_nSectionSize ) + golden_ratio + (h << 6 ) + (h >> 2 );
332+ return h;
333+ }
334+ };
335+
336+ struct CNullMutex
337+ {
338+ void lock () const {}
339+ void unlock () const {}
340+ bool try_lock () const { return true ; }
341+
342+ void lock_shared () const noexcept {}
343+ void unlock_shared () const noexcept {}
344+ bool try_lock_shared () const noexcept { return true ; }
345+ };
346+
347+ template <typename Mutex = CNullMutex>
348+ class CAssemblyModule : public CMemory
349+ {
350+ using UniqueLock_t = std::unique_lock<Mutex>;
351+ using SharedLock_t = std::shared_lock<Mutex>;
264352public:
265353 template <std::size_t SIZE>
266354 class CSignatureView : public Pattern_t <SIZE>
267355 {
268356 using Base_t = Pattern_t<SIZE>;
269357
270358 private:
271- CModule * m_pModule;
359+ CAssemblyModule * m_pModule;
272360
273361 public:
274362 constexpr CSignatureView () : m_pModule(nullptr ) {}
275363 constexpr CSignatureView (CSignatureView&& moveFrom) : Base_t(std::move(moveFrom)), m_pModule(std::move(moveFrom.m_pModule)) {}
276- constexpr CSignatureView (const Base_t& pattern, CModule * module ) : Base_t(pattern), m_pModule(module ) {}
277- constexpr CSignatureView (Base_t&& pattern, CModule * module ) : Base_t(std::move(pattern)), m_pModule(module ) {}
364+ constexpr CSignatureView (const Base_t& pattern, CAssemblyModule * module ) : Base_t(pattern), m_pModule(module ) {}
365+ constexpr CSignatureView (Base_t&& pattern, CAssemblyModule * module ) : Base_t(std::move(pattern)), m_pModule(module ) {}
278366
279367 bool IsValid () const { return m_pModule && m_pModule->IsValid (); }
280368
@@ -294,23 +382,30 @@ class CModule : public CMemory
294382 }; // class CSignatureView<SIZE>
295383
296384private:
385+ [[nodiscard]] CMemory GetVirtualTable (const std::string_view svTableName, bool bDecorated = false ) const ;
386+ [[nodiscard]] CMemory GetFunction (const std::string_view svFunctionName) const noexcept ;
387+ CMemory GetAddress (const CCache& hKey) const noexcept ;
388+
297389 std::string m_sPath;
298390 std::string m_sLastError;
299391 std::vector<Section_t> m_vecSections;
300392
301393 const Section_t *m_pExecutableSection;
302394
395+ alignas (std::hardware_constructive_interference_size) mutable std::unordered_map<CCache, CMemory, CHash> m_mapCached;
396+ DYNLIB_NUA mutable Mutex m_mutex;
397+
303398public:
304- CModule () : m_pExecutableSection(nullptr ) {}
305- ~CModule ();
399+ CAssemblyModule () : m_pExecutableSection(nullptr ) {}
400+ ~CAssemblyModule ();
306401
307- CModule (const CModule &) = delete ;
308- CModule & operator =(const CModule &) = delete ;
309- CModule (CModule && other) noexcept : CMemory(std::exchange(static_cast <CMemory &>(other), DYNLIB_INVALID_MEMORY)), m_sPath(std::move(other.m_sPath)), m_vecSections(std::move(other.m_vecSections)), m_pExecutableSection(std::move(other.m_pExecutableSection)) {}
310- CModule (const CMemory pModuleMemory);
311- explicit CModule (const std::string_view svModuleName);
312- explicit CModule (const char * pszModuleName) : CModule (std::string_view(pszModuleName)) {}
313- explicit CModule (const std::string& sModuleName ) : CModule (std::string_view(sModuleName )) {}
402+ CAssemblyModule (const CAssemblyModule &) = delete ;
403+ CAssemblyModule & operator =(const CAssemblyModule &) = delete ;
404+ CAssemblyModule (CAssemblyModule && other) noexcept : CMemory(std::exchange(static_cast <CMemory &>(other), DYNLIB_INVALID_MEMORY)), m_sPath(std::move(other.m_sPath)), m_vecSections(std::move(other.m_vecSections)), m_pExecutableSection(std::move(other.m_pExecutableSection)) {}
405+ CAssemblyModule (const CMemory pModuleMemory);
406+ explicit CAssemblyModule (const std::string_view svModuleName);
407+ explicit CAssemblyModule (const char * pszModuleName) : CAssemblyModule (std::string_view(pszModuleName)) {}
408+ explicit CAssemblyModule (const std::string& sModuleName ) : CAssemblyModule (std::string_view(sModuleName )) {}
314409
315410 bool LoadFromPath (const std::string_view svModelePath, int flags);
316411
@@ -348,6 +443,12 @@ class CModule : public CMemory
348443 {
349444 const auto * pPattern = pPatternMem.RCastView ();
350445
446+ CCache sKey (pPattern, svMask.size (), pStartAddress, pModuleSection);
447+ if (auto pAddr = GetAddress (sKey ))
448+ {
449+ return pAddr;
450+ }
451+
351452 const Section_t* pSection = pModuleSection ? pModuleSection : m_pExecutableSection;
352453
353454 if (!pSection || !pSection->IsValid ())
@@ -394,10 +495,10 @@ class CModule : public CMemory
394495 }
395496
396497 // How far ahead (in bytes) to prefetch during scanning.
397- // This is calculated based on how many SIMD blocks (16 bytes each) will be read
498+ // This is calculated based on how many SIMD blocks (16 bytes each) will be read
398499 // in the current pattern match attempt.
399500 //
400- // Helps reduce cache misses during large linear memory scans by hinting the CPU
501+ // Helps reduce cache misses during large linear memory scans by hinting the CPU
401502 // to load the next block of memory before it is needed.
402503 const std::size_t lookAhead = numBlocks * kSimdBytes ;
403504
@@ -422,7 +523,11 @@ class CModule : public CMemory
422523 }
423524
424525 if (bFound)
526+ {
527+ UniqueLock_t lock (m_mutex);
528+ m_mapCached[std::move (sKey )] = pData;
425529 return pData;
530+ }
426531 }
427532
428533 return DYNLIB_INVALID_MEMORY;
@@ -493,15 +598,20 @@ class CModule : public CMemory
493598
494599protected:
495600 void SaveLastError ();
496- }; // class CModule
601+ }; // class CAssemblyModule
497602
498- class Module final : CModule
603+ using CModule = CAssemblyModule<CNullMutex>;
604+
605+ class Module final : public CModule
499606{
500607public:
501608 using CBase = CModule;
502609 using CBase::CBase;
503610};
504611
612+ extern template class CAssemblyModule <CNullMutex>;
613+ extern template class CAssemblyModule <std::shared_mutex>;
614+
505615} // namespace DynLibUtils
506616
507617#endif // DYNLIBUTILS_MODULE_HPP
0 commit comments