@@ -73,6 +73,177 @@ CMemory CAssemblyModule<Mutex>::GetAddress(const CCache& hKey) const noexcept
7373 return it->second ;
7474}
7575
76+ template <typename Mutex>
77+ CMemory CAssemblyModule<Mutex>::FindPattern(const CMemoryView<std::uint8_t > pPatternMem, const std::string_view svMask, const CMemory pStartAddress, const Section_t* pModuleSection) const
78+ {
79+ const auto * pPattern = pPatternMem.RCastView ();
80+
81+ CCache sKey (pPattern, svMask.size (), pStartAddress, pModuleSection);
82+ if (auto pAddr = GetAddress (sKey ))
83+ {
84+ return pAddr;
85+ }
86+
87+ const Section_t* pSection = pModuleSection ? pModuleSection : m_pExecutableSection;
88+
89+ if (!pSection || !pSection->IsValid ())
90+ return DYNLIB_INVALID_MEMORY;
91+
92+ const std::uintptr_t base = pSection->GetAddr ();
93+ const std::size_t sectionSize = pSection->m_nSectionSize ;
94+ const std::size_t patternSize = svMask.size ();
95+
96+ auto * pData = reinterpret_cast <std::uint8_t *>(base);
97+ const auto * pEnd = pData + sectionSize - patternSize;
98+
99+ if (pStartAddress)
100+ {
101+ auto * start = pStartAddress.RCast <std::uint8_t *>();
102+ if (start < pData || start > pEnd)
103+ return DYNLIB_INVALID_MEMORY;
104+
105+ pData = start;
106+ }
107+
108+ auto numMasks = static_cast <std::uint8_t >(std::ceil (static_cast <float >(patternSize) / 16 .f ));
109+
110+ #if !DYNLIBUTILS_ARCH_ARM
111+ std::array<int , 64 > masks = {};// 64*16 = enough masks for 1024 bytes.
112+
113+ for (std::uint8_t i = 0 ; i < numMasks; ++i)
114+ {
115+ for (std::int8_t j = static_cast <std::int8_t >(std::min<std::size_t >(patternSize - i * 16 , 16 )) - 1 ; j >= 0 ; --j)
116+ {
117+ if (svMask[static_cast <std::size_t >(i * 16 + j)] == ' x' )
118+ {
119+ masks[i] |= 1 << j;
120+ }
121+ }
122+ }
123+
124+ const __m128i xmm1 = _mm_loadu_si128 (reinterpret_cast <const __m128i*>(pPattern));
125+ __m128i xmm2, xmm3, msks;
126+ for (; pData != pEnd; _mm_prefetch (reinterpret_cast <const char *>(++pData + 64 ), _MM_HINT_NTA))
127+ {
128+ xmm2 = _mm_loadu_si128 (reinterpret_cast <const __m128i*>(pData));
129+ msks = _mm_cmpeq_epi8 (xmm1, xmm2);
130+ if ((_mm_movemask_epi8 (msks) & masks[0 ]) == masks[0 ])
131+ {
132+ bool found = true ;
133+ for (uint8_t i = 1 ; i < numMasks; ++i)
134+ {
135+ xmm2 = _mm_loadu_si128 (reinterpret_cast <const __m128i*>((pData + i * 16 )));
136+ xmm3 = _mm_loadu_si128 (reinterpret_cast <const __m128i*>((pPattern + i * 16 )));
137+ msks = _mm_cmpeq_epi8 (xmm2, xmm3);
138+ if ((_mm_movemask_epi8 (msks) & masks[i]) != masks[i])
139+ {
140+ found = false ;
141+ break ;
142+ }
143+ }
144+
145+ if (found)
146+ {
147+ UniqueLock_t lock (m_mutex);
148+ m_mapCached[std::move (sKey )] = pData;
149+ return pData;
150+ }
151+ }
152+ }
153+ #else
154+ // Precompute contiguous 'x' runs for memcmp.
155+ struct SignatureMask_t
156+ {
157+ std::size_t offset;
158+ std::size_t length;
159+ };
160+
161+ SignatureMask_t sigs[(s_nDefaultPatternSize - 1 ) / 2 ]; // upper bound is fine; SIZE is already capped upstream
162+ std::size_t numSigs = 0 ;
163+
164+ {
165+ std::size_t i = 0 ;
166+ while (i < patternSize)
167+ {
168+ // Skip wildcards
169+ while (i < patternSize && svMask[i] != ' x' )
170+ ++i;
171+
172+ if (i >= patternSize)
173+ break ;
174+
175+ const std::size_t start = i;
176+ while (i < patternSize && svMask[i] == ' x' )
177+ ++i;
178+
179+ const std::size_t len = i - start;
180+ if (len)
181+ {
182+ if (numSigs < std::size (sigs))
183+ {
184+ sigs[numSigs++] = SignatureMask_t{ start, len };
185+ }
186+ else
187+ {
188+ // Fallback: if too many runs for the static buffer, do a simple byte-wise path later.
189+ numSigs = 0 ;
190+ break ;
191+ }
192+ }
193+ }
194+ }
195+
196+ // If mask has no 'x', first position matches trivially.
197+ if (numSigs == 0 && std::find (svMask.begin (), svMask.end (), ' x' ) == svMask.end ())
198+ {
199+ UniqueLock_t lock (m_mutex);
200+ m_mapCached[std::move (sKey )] = pData;
201+ return pData;
202+ }
203+
204+ // Main scan.
205+ for (; pData <= pEnd; ++pData)
206+ {
207+ bool bFound = true ;
208+
209+ if (numSigs)
210+ {
211+ // memcmp only over the strict segments
212+ for (std::size_t r = 0 ; r < numSigs; ++r)
213+ {
214+ const SignatureMask_t& run = sigs[r];
215+ if (std::memcmp (pData + run.offset , pPattern + run.offset , run.length ) != 0 )
216+ {
217+ bFound = false ;
218+ break ;
219+ }
220+ }
221+ }
222+ else
223+ {
224+ // Degenerate path if run buffer overflowed: byte-wise check honoring mask.
225+ for (std::size_t j = 0 ; j < patternSize; ++j)
226+ {
227+ if (svMask[j] == ' x' && pData[j] != pPattern[j])
228+ {
229+ bFound = false ;
230+ break ;
231+ }
232+ }
233+ }
234+
235+ if (bFound)
236+ {
237+ UniqueLock_t lock (m_mutex);
238+ m_mapCached[std::move (sKey )] = pData;
239+ return pData;
240+ }
241+ }
242+ #endif // !DYNLIBUTILS_ARCH_ARM
243+
244+ return DYNLIB_INVALID_MEMORY;
245+ }
246+
76247#ifdef DYNLIBUTILS_SEPARATE_SOURCE_FILES
77248 #if DYNLIBUTILS_PLATFORM_WINDOWS
78249 #include " windows/module.cpp"
0 commit comments