@@ -342,104 +342,100 @@ def rename_function(self, bv: BinaryView, selected_result: Dict) -> List[Dict]:
342342
343343 def _process_data_type_batch (self , chunk : List [Dict ]) -> List [Dict ]:
344344 try :
345- log_info (f"RevEng.AI | Processing data types batch of { len (chunk )} items" )
346-
347- nearest_neighbor_ids = [item ["nearest_neighbor_id" ] for item in chunk ]
348-
349- response = RE_functions_data_types (nearest_neighbor_ids )
350-
351- if response .status_code != 200 :
352- log_error (f"RevEng.AI | Data types API call failed with status { response .status_code } " )
353- return []
354-
355- data = response .json ()
356-
357- if "status" in data and data ["status" ] == "processing" :
358- poll_id = data .get ("poll_id" )
359- if poll_id :
360- log_info (f"RevEng.AI | Polling for data types with ID: { poll_id } " )
361-
362- max_attempts = 30
363- for attempt in range (max_attempts ):
364- time .sleep (2 )
365- poll_response = RE_functions_data_types_poll (poll_id )
366-
367- if poll_response .status_code == 200 :
368- poll_data = poll_response .json ()
369- if poll_data .get ("status" ) == "completed" :
370- data = poll_data
371- break
372- else :
373- log_error (f"RevEng.AI | Polling failed with status { poll_response .status_code } " )
374- break
375- else :
376- log_error (f"RevEng.AI | Polling timed out after { max_attempts } attempts" )
377- return []
378-
345+ log_info (f"RevEng.AI | Processing chunk of { len (chunk )} functions" )
346+ function_ids = set ([result ['nearest_neighbor_id' ] for result in chunk ])
347+ RE_functions_data_types (function_ids = list (function_ids ))
379348 signatures = []
380- for item in data .get ("data" , []):
381- signature = self .make_signature (item .get ("data_types" , []))
382- signatures .append ({
383- "nearest_neighbor_id" : item ["nearest_neighbor_id" ],
384- "signature" : signature
385- })
386-
349+ items = []
350+ while True :
351+ response = RE_functions_data_types_poll (
352+ function_ids = list (function_ids ),
353+ ).json ()
354+ data = response .get ("data" , {})
355+ items = data .get ("items" , [])
356+
357+ pending_count = sum (1 for item in items if item .get ("status" ) == "pending" )
358+ log_info (f"RevEng.AI | { pending_count } items still pending... trying again" )
359+ if not pending_count :
360+ break
361+ time .sleep (3 )
362+
363+ for item in items :
364+ log_info (f"RevEng.AI | Item: { item ['function_id' ]} " )
365+ if item ['status' ] != "completed" :
366+ continue
367+ for result in chunk :
368+ if result ['nearest_neighbor_id' ] == item ['function_id' ]:
369+ signature = self .make_signature (item ['data_types' ])
370+ if signature != "N/A" :
371+ signatures .append ({"nearest_neighbor_id" : result ['nearest_neighbor_id' ], "signature" : signature })
372+ break
373+
374+ #log_info(f"RevEng.AI | Total count: {total_count}")
375+ #log_info(f"RevEng.AI | Total data types: {total_data_types}")
376+ #log_info(f"RevEng.AI | Items: {items}")
377+
387378 return signatures
388-
389379 except Exception as e :
390- log_error (f"RevEng.AI | Error processing data types batch: { str (e )} " )
380+ log_error (f"RevEng.AI | Error processing data type batch: { str (e )} " )
391381 return []
392-
382+
393383 def make_signature (self , data_types : List [Dict ]) -> str :
394384 try :
395- if not data_types :
396- return "void function();"
397-
398- # For now, create a simple signature
399- # This would need to be enhanced based on actual data_types structure
400- return_type = "void"
401- params = []
402-
403- for dt in data_types :
404- if dt .get ("type" ) == "return" :
405- return_type = dt .get ("name" , "void" )
406- elif dt .get ("type" ) == "parameter" :
407- param_type = dt .get ("name" , "int" )
408- param_name = dt .get ("param_name" , f"param{ len (params )} " )
409- params .append (f"{ param_type } { param_name } " )
410-
411- params_str = ", " .join (params ) if params else ""
412- return f"{ return_type } function({ params_str } );"
413-
385+ #log_info(f"RevEng.AI | Making signature for {data_types}")
386+ signature = "("
387+ for _ , arg in data_types ['func_types' ].get ('header' , {}).get ('args' , {}).items ():
388+ #log_info(f"RevEng.AI | Arg: {arg}")
389+ signature += f"{ arg .get ('type' , 'N/A' )} , "
390+ signature = signature [:- 2 ] if signature .endswith (", " ) else signature
391+
392+ signature += f") { data_types ['func_types' ].get ('type' , 'N/A' )} "
393+
394+ log_info (f"RevEng.AI | Signature: { signature } " )
395+ return signature
414396 except Exception as e :
415- log_error (f"RevEng.AI | Error creating signature: { str (e )} " )
416- return "void function(); "
397+ log_error (f"RevEng.AI | Error making signature: { str (e )} " )
398+ return "N/A "
417399
418400 def fetch_data_types (self , bv : BinaryView , selected_results : List [Dict ]) -> Tuple [bool , Dict [str , Any ]]:
419- """Fetch data types for selected function matches"""
420401 try :
421- log_info (f "RevEng.AI | Starting data type fetching for { len ( selected_results ) } functions " )
402+ log_info ("RevEng.AI | Starting data type fetching" )
422403
423- # Process in chunks to avoid API limits
404+ if len (selected_results ) == 0 :
405+ return False , "No valid functions selected"
406+
424407 chunk_size = 50
425- all_signatures = []
426-
427- for i in range (0 , len (selected_results ), chunk_size ):
428- chunk = selected_results [i :i + chunk_size ]
429- log_info (f"RevEng.AI | Processing chunk { i // chunk_size + 1 } /{ (len (selected_results ) + chunk_size - 1 )// chunk_size } " )
430-
431- signatures = self ._process_data_type_batch (chunk )
432- all_signatures .extend (signatures )
433-
434- success_count = len ([s for s in all_signatures if s .get ("signature" ) != "void function();" ])
435-
436- log_info (f"RevEng.AI | Data type fetching completed. { success_count } functions have signatures" )
408+ if len (selected_results ) < chunk_size :
409+ chunks = [selected_results ]
410+ else :
411+ chunks = [selected_results [i :i + chunk_size ] for i in range (0 , len (selected_results ), chunk_size )]
412+
413+ log_info (f"RevEng.AI | Processing { len (selected_results )} functions in { len (chunks )} chunks of size { chunk_size } " )
414+
415+ signatures = []
437416
438- return True , {
439- "signatures" : all_signatures ,
440- "success_count" : success_count
417+ with ThreadPoolExecutor (max_workers = 4 ) as executor :
418+ future_to_chunk = {
419+ executor .submit (self ._process_data_type_batch , chunk ): i
420+ for i , chunk in enumerate (chunks )
421+ }
422+
423+ for future in as_completed (future_to_chunk ):
424+ chunk_index = future_to_chunk [future ]
425+ try :
426+ chunk = future .result ()
427+ log_info (f"RevEng.AI | Chunk { chunk_index } completed" )
428+ signatures .extend (chunk )
429+
430+ except Exception as e :
431+ log_error (f"RevEng.AI | Error processing chunk { chunk_index } : { str (e )} " )
432+
433+ options = {
434+ "success_count" : len (signatures ),
435+ "signatures" : signatures
441436 }
442-
437+
438+ return True , options
443439 except Exception as e :
444- log_error (f"RevEng.AI | Error in data type fetching : { str (e )} " )
445- return False , str (e )
440+ log_error (f"RevEng.AI | Error fetching data types : { str (e )} " )
441+ return False , str (e )
0 commit comments