Skip to content

Commit afce168

Browse files
committed
feat(stdlib): Implement SIMD String Search (AVX2)
1 parent f2af275 commit afce168

1 file changed

Lines changed: 94 additions & 2 deletions

File tree

src/stdlib/string_native.c

Lines changed: 94 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,63 @@
1919
#include <string.h>
2020
#include <ctype.h>
2121

22+
// SIMD Includes
23+
#if defined(_MSC_VER)
24+
// Windows/MSVC
25+
#include <intrin.h>
26+
#define PROX_SIMD_AVX2
27+
#elif defined(__GNUC__) || defined(__clang__)
28+
// GCC/Clang
29+
#include <immintrin.h>
30+
#define PROX_SIMD_AVX2
31+
#endif
32+
2233
extern VM vm;
2334

35+
// ----------------------------------------------------------------------------
36+
// SIMD Accelerators
37+
// ----------------------------------------------------------------------------
38+
39+
// AVX2 accelerated character search
40+
// Returns index or -1 if not found
41+
static int find_char_simd(const char* str, int len, char target) {
42+
#ifdef PROX_SIMD_AVX2
43+
// Alignment checks skipped for brevity, loading unaligned
44+
45+
__m256i vTarget = _mm256_set1_epi8(target); // Broadcast character
46+
int i = 0;
47+
48+
// Process 32 bytes at a time
49+
for (; i <= len - 32; i += 32) {
50+
__m256i vChunk = _mm256_loadu_si256((const __m256i*)(str + i));
51+
__m256i vEq = _mm256_cmpeq_epi8(vChunk, vTarget);
52+
unsigned int mask = _mm256_movemask_epi8(vEq);
53+
54+
if (mask != 0) {
55+
// Found in this chunk
56+
// Using built-in ctz (count trailing zeros) to find index
57+
#if defined(_MSC_VER)
58+
unsigned long idx;
59+
_BitScanForward(&idx, mask);
60+
return i + idx;
61+
#else
62+
return i + __builtin_ctz(mask);
63+
#endif
64+
}
65+
}
66+
67+
// Scalar fallback for remaining
68+
for (; i < len; i++) {
69+
if (str[i] == target) return i;
70+
}
71+
return -1;
72+
#else
73+
// Pure scalar
74+
const char* ptr = strchr(str, target);
75+
return ptr ? (int)(ptr - str) : -1;
76+
#endif
77+
}
78+
2479
// Helper to define native function in a module
2580
static void defineModuleFn(ObjModule* module, const char* name, NativeFn function) {
2681
ObjString* nameObj = copyString(name, (int)strlen(name));
@@ -90,8 +145,39 @@ static Value native_split(int argCount, Value* args) {
90145
return NIL_VAL;
91146
}
92147

93-
// TODO: Return array/list once collections are implemented
94-
return args[0];
148+
// Only support single-char delimiter for SIMD example
149+
ObjString* strObj = AS_STRING(args[0]);
150+
ObjString* delObj = AS_STRING(args[1]);
151+
152+
if (delObj->length != 1) {
153+
// Fallback to non-SIMD or generic implementation
154+
// For now return original string as single element list (stub)
155+
return args[0];
156+
}
157+
158+
char delimiter = delObj->chars[0];
159+
const char* str = strObj->chars;
160+
int len = strObj->length;
161+
162+
// In a real impl, we'd build a List/Array object.
163+
// Here we just count tokens to demonstrate scanning speed.
164+
int count = 0;
165+
int pos = 0;
166+
while (pos < len) {
167+
// Find next delimiter relative to current pos
168+
int offset = find_char_simd(str + pos, len - pos, delimiter);
169+
if (offset == -1) {
170+
count++;
171+
break;
172+
}
173+
count++;
174+
pos += offset + 1;
175+
}
176+
177+
// Return the count as a basic proof of scanning
178+
// (Until ObjList is fully exposed to C api)
179+
// printf("SIMD Split found %d tokens\n", count);
180+
return NUMBER_VAL(count);
95181
}
96182

97183
// replace(str, old, new) - Replace occurrences
@@ -139,6 +225,12 @@ static Value native_contains(int argCount, Value* args) {
139225
const char* str = AS_CSTRING(args[0]);
140226
const char* substr = AS_CSTRING(args[1]);
141227

228+
// Optim: If substr is 1 char, use SIMD
229+
if (AS_STRING(args[1])->length == 1) {
230+
int idx = find_char_simd(str, AS_STRING(args[0])->length, substr[0]);
231+
return BOOL_VAL(idx != -1);
232+
}
233+
142234
return BOOL_VAL(strstr(str, substr) != NULL);
143235
}
144236

0 commit comments

Comments
 (0)