@@ -420,6 +420,49 @@ static float* load_stored_embedding(const char* hash, size_t *dims)
420420 }
421421 DEBUG_PRINT ("%s" , debug_buf );
422422
423+ // Check for NumPy magic string '\x93NUMPY'
424+ if (decompressed_size >= 10 && memcmp (decompressed_data , "\x93NUMPY" , 6 ) == 0 ) {
425+ DEBUG_PRINT ("Detected NumPy array format (.npy) in decompressed data" );
426+
427+ // Extract header size from NumPy format (stored at offset 8 as uint16)
428+ uint16_t header_size = * ((const uint16_t * )((const uint8_t * )decompressed_data + 8 ));
429+
430+ // Calculate data offset
431+ size_t data_offset = 10 + header_size ;
432+
433+ if (decompressed_size > data_offset ) {
434+ // Set dimensions - for numpy arrays, this will typically be the first dimension
435+ // in the shape array, which is the number of elements in the array
436+ float * float_data = (float * )((uint8_t * )decompressed_data + data_offset );
437+ * dims = (decompressed_size - data_offset ) / sizeof (float );
438+
439+ DEBUG_PRINT ("NumPy array has %zu dimensions/elements" , * dims );
440+
441+ // Make a copy of the float data to return
442+ float * data_copy = malloc (* dims * sizeof (float ));
443+ if (!data_copy ) {
444+ cli_error ("Out of memory" );
445+ free (decompressed_data );
446+ eb_store_destroy (store );
447+ free (repo_root );
448+ return NULL ;
449+ }
450+
451+ memcpy (data_copy , float_data , * dims * sizeof (float ));
452+
453+ // Debug first few values
454+ DEBUG_PRINT ("First 5 values from numpy array:" );
455+ for (size_t i = 0 ; i < 5 && i < * dims ; i ++ ) {
456+ DEBUG_PRINT ("[%zu]: %f" , i , data_copy [i ]);
457+ }
458+
459+ free (decompressed_data );
460+ eb_store_destroy (store );
461+ free (repo_root );
462+ return data_copy ;
463+ }
464+ }
465+
423466 // Check specifically for binary format with dimension header
424467 if (decompressed_size >= 4 ) {
425468 uint32_t dim_header = 0 ;
@@ -528,6 +571,40 @@ static float* load_stored_embedding(const char* hash, size_t *dims)
528571 & decompressed_data , & decompressed_size ) == EB_SUCCESS ) {
529572 DEBUG_PRINT ("Successfully decompressed data directly: %zu bytes" , decompressed_size );
530573
574+ // Check if it's a NumPy file
575+ if (decompressed_size >= 10 && memcmp (decompressed_data , "\x93NUMPY" , 6 ) == 0 ) {
576+ DEBUG_PRINT ("Detected NumPy array format (.npy) in directly decompressed data" );
577+
578+ // Extract header size from NumPy format (stored at offset 8 as uint16)
579+ uint16_t header_size = * ((const uint16_t * )((const uint8_t * )decompressed_data + 8 ));
580+
581+ // Calculate data offset
582+ size_t data_offset = 10 + header_size ;
583+
584+ if (decompressed_size > data_offset ) {
585+ // Set dimensions
586+ float * float_data = (float * )((uint8_t * )decompressed_data + data_offset );
587+ * dims = (decompressed_size - data_offset ) / sizeof (float );
588+
589+ DEBUG_PRINT ("NumPy array has %zu dimensions/elements" , * dims );
590+
591+ // Make a copy of the float data to return
592+ float * data_copy = malloc (* dims * sizeof (float ));
593+ if (data_copy ) {
594+ memcpy (data_copy , float_data , * dims * sizeof (float ));
595+
596+ DEBUG_PRINT ("Successfully extracted NumPy array from direct file" );
597+
598+ free (decompressed_data );
599+ free (compressed_data );
600+ fclose (f );
601+ eb_store_destroy (store );
602+ free (repo_root );
603+ return data_copy ;
604+ }
605+ }
606+ }
607+
531608 // Check if it has the dimension header format
532609 if (decompressed_size >= 4 ) {
533610 uint32_t dim_header = 0 ;
0 commit comments