Skip to content

Commit 6637278

Browse files
Diff detection
1 parent 9d1b07e commit 6637278

23 files changed

Lines changed: 77 additions & 3486 deletions

src/cli/diff.c

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,49 @@ static float* load_stored_embedding(const char* hash, size_t *dims)
420420
}
421421
DEBUG_PRINT("%s", debug_buf);
422422

423+
// Check for NumPy magic string '\x93NUMPY'
424+
if (decompressed_size >= 10 && memcmp(decompressed_data, "\x93NUMPY", 6) == 0) {
425+
DEBUG_PRINT("Detected NumPy array format (.npy) in decompressed data");
426+
427+
// Extract header size from NumPy format (stored at offset 8 as uint16)
428+
uint16_t header_size = *((const uint16_t*)((const uint8_t*)decompressed_data + 8));
429+
430+
// Calculate data offset
431+
size_t data_offset = 10 + header_size;
432+
433+
if (decompressed_size > data_offset) {
434+
// Set dimensions - for numpy arrays, this will typically be the first dimension
435+
// in the shape array, which is the number of elements in the array
436+
float* float_data = (float*)((uint8_t*)decompressed_data + data_offset);
437+
*dims = (decompressed_size - data_offset) / sizeof(float);
438+
439+
DEBUG_PRINT("NumPy array has %zu dimensions/elements", *dims);
440+
441+
// Make a copy of the float data to return
442+
float *data_copy = malloc(*dims * sizeof(float));
443+
if (!data_copy) {
444+
cli_error("Out of memory");
445+
free(decompressed_data);
446+
eb_store_destroy(store);
447+
free(repo_root);
448+
return NULL;
449+
}
450+
451+
memcpy(data_copy, float_data, *dims * sizeof(float));
452+
453+
// Debug first few values
454+
DEBUG_PRINT("First 5 values from numpy array:");
455+
for (size_t i = 0; i < 5 && i < *dims; i++) {
456+
DEBUG_PRINT("[%zu]: %f", i, data_copy[i]);
457+
}
458+
459+
free(decompressed_data);
460+
eb_store_destroy(store);
461+
free(repo_root);
462+
return data_copy;
463+
}
464+
}
465+
423466
// Check specifically for binary format with dimension header
424467
if (decompressed_size >= 4) {
425468
uint32_t dim_header = 0;
@@ -528,6 +571,40 @@ static float* load_stored_embedding(const char* hash, size_t *dims)
528571
&decompressed_data, &decompressed_size) == EB_SUCCESS) {
529572
DEBUG_PRINT("Successfully decompressed data directly: %zu bytes", decompressed_size);
530573

574+
// Check if it's a NumPy file
575+
if (decompressed_size >= 10 && memcmp(decompressed_data, "\x93NUMPY", 6) == 0) {
576+
DEBUG_PRINT("Detected NumPy array format (.npy) in directly decompressed data");
577+
578+
// Extract header size from NumPy format (stored at offset 8 as uint16)
579+
uint16_t header_size = *((const uint16_t*)((const uint8_t*)decompressed_data + 8));
580+
581+
// Calculate data offset
582+
size_t data_offset = 10 + header_size;
583+
584+
if (decompressed_size > data_offset) {
585+
// Set dimensions
586+
float* float_data = (float*)((uint8_t*)decompressed_data + data_offset);
587+
*dims = (decompressed_size - data_offset) / sizeof(float);
588+
589+
DEBUG_PRINT("NumPy array has %zu dimensions/elements", *dims);
590+
591+
// Make a copy of the float data to return
592+
float *data_copy = malloc(*dims * sizeof(float));
593+
if (data_copy) {
594+
memcpy(data_copy, float_data, *dims * sizeof(float));
595+
596+
DEBUG_PRINT("Successfully extracted NumPy array from direct file");
597+
598+
free(decompressed_data);
599+
free(compressed_data);
600+
fclose(f);
601+
eb_store_destroy(store);
602+
free(repo_root);
603+
return data_copy;
604+
}
605+
}
606+
}
607+
531608
// Check if it has the dimension header format
532609
if (decompressed_size >= 4) {
533610
uint32_t dim_header = 0;

src/python/__init__.py

Lines changed: 0 additions & 3 deletions
This file was deleted.

src/python/embedding_bridge/__init__.py

Lines changed: 0 additions & 9 deletions
This file was deleted.

src/python/embedding_bridge/api.py

Lines changed: 0 additions & 243 deletions
This file was deleted.

0 commit comments

Comments
 (0)