Skip to content

Commit b404b87

Browse files
committed
Move FindPattern method to source
1 parent d81ef7b commit b404b87

2 files changed

Lines changed: 174 additions & 107 deletions

File tree

include/dynlibutils/module.hpp

Lines changed: 3 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -437,124 +437,20 @@ class CAssemblyModule : public CMemory
437437
// *pModuleSection
438438
// Output : CMemory
439439
//-----------------------------------------------------------------------------
440-
template<std::size_t SIZE = (s_nDefaultPatternSize - 1) / 2>
441-
inline CMemory FindPattern(const CMemoryView<std::uint8_t> pPatternMem, const std::string_view svMask, const CMemory pStartAddress, const Section_t* pModuleSection) const
442-
{
443-
const auto* pPattern = pPatternMem.RCastView();
444-
445-
CCache sKey(pPattern, svMask.size(), pStartAddress, pModuleSection);
446-
if (auto pAddr = GetAddress(sKey))
447-
{
448-
return pAddr;
449-
}
450-
451-
const Section_t* pSection = pModuleSection ? pModuleSection : m_pExecutableSection;
452-
453-
if (!pSection || !pSection->IsValid())
454-
return DYNLIB_INVALID_MEMORY;
455-
456-
const std::uintptr_t base = pSection->GetAddr();
457-
const std::size_t sectionSize = pSection->m_nSectionSize;
458-
const std::size_t patternSize = svMask.size();
459-
460-
auto* pData = reinterpret_cast<std::uint8_t*>(base);
461-
const auto* pEnd = pData + sectionSize - patternSize;
462-
463-
if (pStartAddress)
464-
{
465-
auto* start = pStartAddress.RCast<std::uint8_t*>();
466-
if (start < pData || start > pEnd)
467-
return DYNLIB_INVALID_MEMORY;
468-
469-
pData = start;
470-
}
471-
472-
#if !DYNLIBUTILS_ARCH_ARM
473-
std::array<int, 64> masks = {};// 64*16 = enough masks for 1024 bytes.
474-
auto numMasks = static_cast<std::uint8_t>(std::ceil(static_cast<float>(patternSize) / 16.f));
475-
476-
for (std::uint8_t i = 0; i < numMasks; ++i)
477-
{
478-
for (std::int8_t j = static_cast<std::int8_t>(std::min<std::size_t>(patternSize - i * 16, 16)) - 1; j >= 0; --j)
479-
{
480-
if (svMask[static_cast<std::size_t>(i * 16 + j)] == 'x')
481-
{
482-
masks[i] |= 1 << j;
483-
}
484-
}
485-
}
486-
487-
const __m128i xmm1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pPattern));
488-
__m128i xmm2, xmm3, msks;
489-
for (; pData != pEnd; _mm_prefetch(reinterpret_cast<const char*>(++pData + 64), _MM_HINT_NTA))
490-
{
491-
xmm2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pData));
492-
msks = _mm_cmpeq_epi8(xmm1, xmm2);
493-
if ((_mm_movemask_epi8(msks) & masks[0]) == masks[0])
494-
{
495-
bool found = true;
496-
for (uint8_t i = 1; i < numMasks; ++i)
497-
{
498-
xmm2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>((pData + i * 16)));
499-
xmm3 = _mm_loadu_si128(reinterpret_cast<const __m128i*>((pPattern + i * 16)));
500-
msks = _mm_cmpeq_epi8(xmm2, xmm3);
501-
if ((_mm_movemask_epi8(msks) & masks[i]) != masks[i])
502-
{
503-
found = false;
504-
break;
505-
}
506-
}
507-
508-
if (found)
509-
{
510-
UniqueLock_t lock(m_mutex);
511-
m_mapCached[std::move(sKey)] = pData;
512-
return pData;
513-
}
514-
}
515-
}
516-
#else
517-
for (; pData != pEnd; ++pData)
518-
{
519-
bool found = false;
520-
521-
for (size_t i = 0; i < maskLen; ++i)
522-
{
523-
if (mask[i] == 'x' || pPattern[i] == *(pData + i))
524-
{
525-
found = true;
526-
}
527-
else
528-
{
529-
found = false;
530-
break;
531-
}
532-
}
533-
534-
if (found)
535-
{
536-
UniqueLock_t lock(m_mutex);
537-
m_mapCached[std::move(sKey)] = pData;
538-
return pData;
539-
}
540-
}
541-
#endif // !DYNLIBUTILS_ARCH_ARM
542-
543-
return DYNLIB_INVALID_MEMORY;
544-
}
440+
CMemory FindPattern(const CMemoryView<std::uint8_t> pPatternMem, const std::string_view svMask, const CMemory pStartAddress, const Section_t* pModuleSection) const;
545441

546442
template<std::size_t SIZE>
547443
[[nodiscard]]
548444
inline CMemory FindPattern(const Pattern_t<SIZE>& copyPattern, const CMemory pStartAddress = nullptr, const Section_t* pModuleSection = nullptr) const
549445
{
550-
return FindPattern<SIZE>(copyPattern.m_aBytes.data(), std::string_view(copyPattern.m_aMask.data(), copyPattern.m_nSize), pStartAddress, pModuleSection);
446+
return FindPattern(copyPattern.m_aBytes.data(), std::string_view(copyPattern.m_aMask.data(), copyPattern.m_nSize), pStartAddress, pModuleSection);
551447
}
552448

553449
template<std::size_t SIZE>
554450
[[nodiscard]]
555451
inline CMemory FindPattern(Pattern_t<SIZE>&& movePattern, const CMemory pStartAddress = nullptr, const Section_t* pModuleSection = nullptr) const
556452
{
557-
return FindPattern<SIZE>(std::move(movePattern.m_aBytes).data(), std::string_view(std::move(movePattern.m_aMask).data(), std::move(movePattern.m_nSize)), pStartAddress, pModuleSection);
453+
return FindPattern(std::move(movePattern.m_aBytes).data(), std::string_view(std::move(movePattern.m_aMask).data(), std::move(movePattern.m_nSize)), pStartAddress, pModuleSection);
558454
}
559455

560456
template<std::size_t SIZE, PatternCallback_t FUNC>

src/module.cpp

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,177 @@ CMemory CAssemblyModule<Mutex>::GetAddress(const CCache& hKey) const noexcept
7373
return it->second;
7474
}
7575

76+
template<typename Mutex>
77+
CMemory CAssemblyModule<Mutex>::FindPattern(const CMemoryView<std::uint8_t> pPatternMem, const std::string_view svMask, const CMemory pStartAddress, const Section_t* pModuleSection) const
78+
{
79+
const auto* pPattern = pPatternMem.RCastView();
80+
81+
CCache sKey(pPattern, svMask.size(), pStartAddress, pModuleSection);
82+
if (auto pAddr = GetAddress(sKey))
83+
{
84+
return pAddr;
85+
}
86+
87+
const Section_t* pSection = pModuleSection ? pModuleSection : m_pExecutableSection;
88+
89+
if (!pSection || !pSection->IsValid())
90+
return DYNLIB_INVALID_MEMORY;
91+
92+
const std::uintptr_t base = pSection->GetAddr();
93+
const std::size_t sectionSize = pSection->m_nSectionSize;
94+
const std::size_t patternSize = svMask.size();
95+
96+
auto* pData = reinterpret_cast<std::uint8_t*>(base);
97+
const auto* pEnd = pData + sectionSize - patternSize;
98+
99+
if (pStartAddress)
100+
{
101+
auto* start = pStartAddress.RCast<std::uint8_t*>();
102+
if (start < pData || start > pEnd)
103+
return DYNLIB_INVALID_MEMORY;
104+
105+
pData = start;
106+
}
107+
108+
auto numMasks = static_cast<std::uint8_t>(std::ceil(static_cast<float>(patternSize) / 16.f));
109+
110+
#if !DYNLIBUTILS_ARCH_ARM
111+
std::array<int, 64> masks = {};// 64*16 = enough masks for 1024 bytes.
112+
113+
for (std::uint8_t i = 0; i < numMasks; ++i)
114+
{
115+
for (std::int8_t j = static_cast<std::int8_t>(std::min<std::size_t>(patternSize - i * 16, 16)) - 1; j >= 0; --j)
116+
{
117+
if (svMask[static_cast<std::size_t>(i * 16 + j)] == 'x')
118+
{
119+
masks[i] |= 1 << j;
120+
}
121+
}
122+
}
123+
124+
const __m128i xmm1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pPattern));
125+
__m128i xmm2, xmm3, msks;
126+
for (; pData != pEnd; _mm_prefetch(reinterpret_cast<const char*>(++pData + 64), _MM_HINT_NTA))
127+
{
128+
xmm2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pData));
129+
msks = _mm_cmpeq_epi8(xmm1, xmm2);
130+
if ((_mm_movemask_epi8(msks) & masks[0]) == masks[0])
131+
{
132+
bool found = true;
133+
for (uint8_t i = 1; i < numMasks; ++i)
134+
{
135+
xmm2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>((pData + i * 16)));
136+
xmm3 = _mm_loadu_si128(reinterpret_cast<const __m128i*>((pPattern + i * 16)));
137+
msks = _mm_cmpeq_epi8(xmm2, xmm3);
138+
if ((_mm_movemask_epi8(msks) & masks[i]) != masks[i])
139+
{
140+
found = false;
141+
break;
142+
}
143+
}
144+
145+
if (found)
146+
{
147+
UniqueLock_t lock(m_mutex);
148+
m_mapCached[std::move(sKey)] = pData;
149+
return pData;
150+
}
151+
}
152+
}
153+
#else
154+
// Precompute contiguous 'x' runs for memcmp.
155+
struct SignatureMask_t
156+
{
157+
std::size_t offset;
158+
std::size_t length;
159+
};
160+
161+
SignatureMask_t sigs[(s_nDefaultPatternSize - 1) / 2]; // upper bound is fine; SIZE is already capped upstream
162+
std::size_t numSigs = 0;
163+
164+
{
165+
std::size_t i = 0;
166+
while (i < patternSize)
167+
{
168+
// Skip wildcards
169+
while (i < patternSize && svMask[i] != 'x')
170+
++i;
171+
172+
if (i >= patternSize)
173+
break;
174+
175+
const std::size_t start = i;
176+
while (i < patternSize && svMask[i] == 'x')
177+
++i;
178+
179+
const std::size_t len = i - start;
180+
if (len)
181+
{
182+
if (numSigs < std::size(sigs))
183+
{
184+
sigs[numSigs++] = SignatureMask_t{ start, len };
185+
}
186+
else
187+
{
188+
// Fallback: if too many runs for the static buffer, do a simple byte-wise path later.
189+
numSigs = 0;
190+
break;
191+
}
192+
}
193+
}
194+
}
195+
196+
// If mask has no 'x', first position matches trivially.
197+
if (numSigs == 0 && std::find(svMask.begin(), svMask.end(), 'x') == svMask.end())
198+
{
199+
UniqueLock_t lock(m_mutex);
200+
m_mapCached[std::move(sKey)] = pData;
201+
return pData;
202+
}
203+
204+
// Main scan.
205+
for (; pData <= pEnd; ++pData)
206+
{
207+
bool bFound = true;
208+
209+
if (numSigs)
210+
{
211+
// memcmp only over the strict segments
212+
for (std::size_t r = 0; r < numSigs; ++r)
213+
{
214+
const SignatureMask_t& run = sigs[r];
215+
if (std::memcmp(pData + run.offset, pPattern + run.offset, run.length) != 0)
216+
{
217+
bFound = false;
218+
break;
219+
}
220+
}
221+
}
222+
else
223+
{
224+
// Degenerate path if run buffer overflowed: byte-wise check honoring mask.
225+
for (std::size_t j = 0; j < patternSize; ++j)
226+
{
227+
if (svMask[j] == 'x' && pData[j] != pPattern[j])
228+
{
229+
bFound = false;
230+
break;
231+
}
232+
}
233+
}
234+
235+
if (bFound)
236+
{
237+
UniqueLock_t lock(m_mutex);
238+
m_mapCached[std::move(sKey)] = pData;
239+
return pData;
240+
}
241+
}
242+
#endif // !DYNLIBUTILS_ARCH_ARM
243+
244+
return DYNLIB_INVALID_MEMORY;
245+
}
246+
76247
#ifdef DYNLIBUTILS_SEPARATE_SOURCE_FILES
77248
#if DYNLIBUTILS_PLATFORM_WINDOWS
78249
#include "windows/module.cpp"

0 commit comments

Comments
 (0)