@@ -50,10 +50,17 @@ static int detect_arch(const char *prefix, char *ret, CUresult *err);
5050static gpudata * new_gpudata (cuda_context * ctx , CUdeviceptr ptr , size_t size );
5151
5252typedef struct _kernel_key {
53+ uint8_t version ;
54+ uint8_t debug ;
55+ uint8_t major ;
56+ uint8_t minor ;
57+ uint32_t reserved ;
5358 char bin_id [64 ];
5459 strb src ;
5560} kernel_key ;
5661
62+ #define KERNEL_KEY_MM (sizeof(kernel_key) - sizeof(strb))
63+
5764static void key_free (cache_key_t _k ) {
5865 kernel_key * k = (kernel_key * )_k ;
5966 strb_clear (& k -> src );
@@ -70,36 +77,36 @@ static uint32_t strb_hash(strb *k) {
7077}
7178
7279static int key_eq (kernel_key * k1 , kernel_key * k2 ) {
73- return (memcmp (k1 -> bin_id , k2 -> bin_id , 64 ) == 0 &&
80+ return (memcmp (k1 , k2 , KERNEL_KEY_MM ) == 0 &&
7481 strb_eq (& k1 -> src , & k2 -> src ));
7582}
7683
7784static int key_hash (kernel_key * k ) {
7885 XXH32_state_t state ;
7986 XXH32_reset (& state , 42 );
80- XXH32_update (& state , k -> bin_id , 64 );
87+ XXH32_update (& state , k , KERNEL_KEY_MM );
8188 XXH32_update (& state , k -> src .s , k -> src .l );
8289 return XXH32_digest (& state );
8390}
8491
8592static int key_write (strb * res , kernel_key * k ) {
86- strb_appendn (res , k -> bin_id , 64 );
93+ strb_appendn (res , ( const char * ) k , KERNEL_KEY_MM );
8794 strb_appendb (res , & k -> src );
8895 return strb_error (res );
8996}
9097
9198static kernel_key * key_read (const strb * b ) {
9299 kernel_key * k ;
93- if (b -> l < 64 ) return NULL ;
100+ if (b -> l < KERNEL_KEY_MM ) return NULL ;
94101 k = calloc (1 , sizeof (* k ));
95102 if (k == NULL ) return NULL ;
96- if (strb_ensure (& k -> src , b -> l - 64 ) != 0 ) {
103+ if (strb_ensure (& k -> src , b -> l - KERNEL_KEY_MM ) != 0 ) {
97104 strb_clear (& k -> src );
98105 free (k );
99106 return NULL ;
100107 }
101- memcpy (k -> bin_id , b -> s , 64 );
102- strb_appendn (& k -> src , b -> s + 64 , b -> l - 64 );
108+ memcpy (k -> bin_id , b -> s , KERNEL_KEY_MM );
109+ strb_appendn (& k -> src , b -> s + KERNEL_KEY_MM , b -> l - KERNEL_KEY_MM );
103110 return k ;
104111}
105112
@@ -1108,6 +1115,13 @@ static int compile(cuda_context *ctx, strb *src, strb* bin, strb *log) {
11081115 kernel_key * pk ;
11091116 int err ;
11101117
1118+ memset (& k , 0 , sizeof (k ));
1119+ k .version = 0 ;
1120+ #ifdef DEBUG
1121+ k .debug = 1 ;
1122+ #endif
1123+ k .major = ctx -> major ;
1124+ k .minor = ctx -> minor ;
11111125 memcpy (k .bin_id , ctx -> bin_id , 64 );
11121126 memcpy (& k .src , src , sizeof (strb ));
11131127
@@ -1131,7 +1145,7 @@ static int compile(cuda_context *ctx, strb *src, strb* bin, strb *log) {
11311145 fprintf (stderr , "Error adding kernel to disk cache\n" );
11321146 return GA_NO_ERROR ;
11331147 }
1134- memcpy (pk -> bin_id , k . bin_id , 64 );
1148+ memcpy (pk , & k , KERNEL_KEY_MM );
11351149 strb_appendb (& pk -> src , src );
11361150 if (strb_error (& pk -> src )) {
11371151 // TODO use better error messages
0 commit comments