77import re
88import time
99from concurrent .futures import ThreadPoolExecutor , as_completed
10+ from libbs .artifacts import _art_from_dict
11+ from libbs .api import DecompilerInterface
12+ from libbs .decompilers .binja .interface import BinjaInterface
13+ from threading import Event
1014from revengai .utils import rename_function as rename_function_util
15+ from revengai .utils import apply_data_types as apply_data_types_util
16+ from libbs .artifacts import (
17+ Function ,
18+ FunctionArgument ,
19+ GlobalVariable ,
20+ Enum ,
21+ Struct ,
22+ Typedef ,
23+ )
1124
1225class MatchCurrentFunction :
1326 def __init__ (self , config ):
@@ -18,6 +31,7 @@ def __init__(self, config):
1831 self .analyzed_functions = []
1932 self .filtered_collections = []
2033 self .filtered_binaries = []
34+ self .cancelled = Event ()
2135
2236 def search_collections (self , bv : BinaryView , search_term : str = "" ):
2337 try :
@@ -129,13 +143,13 @@ def match_functions(self, bv: BinaryView, options: Dict[str, Any]) -> List[Dict]
129143 function_addr = options .get ("function" , None )
130144 result = { "matched" : 0 , "skipped" : 0 , "data" : [] }
131145
132- functions = bv .get_functions_containing (function_addr )
146+ functions_containing = bv .get_functions_containing (function_addr )
133147
134- if not functions :
148+ if not functions_containing :
135149 log_error (f"RevEng.AI | Function not found at 0x{ function_addr :x} " )
136150 raise Exception ("Function not found at address" )
137151
138- function = functions [0 ]
152+ function = functions_containing [0 ]
139153 log_info (f"RevEng.AI | Function: { function .name } at 0x{ function .start :x} " )
140154
141155 filtered_collections = []
@@ -150,7 +164,7 @@ def match_functions(self, bv: BinaryView, options: Dict[str, Any]) -> List[Dict]
150164 log_info (f"RevEng.AI | Selected collections: { selected_collections } " )
151165 log_info (f"RevEng.AI | Debug symbols: { debug_symbols } " )
152166 log_info (f"RevEng.AI | Debug symbols count: { debug_symbols_count } " )
153- log_info (f"RevEng.AI | Function : 0x{ function_addr :x} " )
167+ log_info (f"RevEng.AI | Clicked address : 0x{ function_addr :x} " )
154168
155169 binary_id = self .config .get_binary_id (bv )
156170 if not binary_id :
@@ -173,21 +187,43 @@ def match_functions(self, bv: BinaryView, options: Dict[str, Any]) -> List[Dict]
173187 binaries = filtered_binaries ,
174188 nns = debug_symbols_count
175189 ).json ()["function_matches" ]
190+
191+ functions = []
192+ for function_by_distance in functions_by_distance :
193+ functions .append ({"function_id" : function_by_distance ['origin_function_id' ], "function_name" : function_by_distance ['nearest_neighbor_function_name' ]})
194+ if len (functions ) == 0 :
195+ return 0 , []
196+ functions_by_score = RE_name_score (functions ).json ()["data" ]
197+ if len (functions_by_score ) == 0 :
198+ return 0 , []
199+
176200 results = []
177- for result in functions_by_distance :
201+ for function_by_distance in functions_by_distance :
178202 try :
203+
204+ matched_name = function_by_distance ['nearest_neighbor_function_name' ] if function_by_distance ['nearest_neighbor_function_name' ] else function_by_distance ['nearest_neighbor_function_name_mangled' ]
205+ matched_name_mangled = function_by_distance ['nearest_neighbor_function_name_mangled' ] if function_by_distance ['nearest_neighbor_function_name_mangled' ] else function_by_distance ['nearest_neighbor_function_name' ]
206+
207+ functions = [{"function_id" : function_by_distance ['origin_function_id' ], "function_name" : matched_name_mangled }]
208+ functions_by_score = RE_name_score (functions ).json ()["data" ]
209+ function_by_score = next ((f for f in functions_by_score if f ['function_id' ] == function_by_distance ['origin_function_id' ]), None )
210+
211+ confidence = function_by_score .get ('box_plot' , {}).get ('average' , 0 ) if function_by_score else 0
212+
179213 results .append ({
180- "original_name" : function .name ,
181- "matched_name" : result ['nearest_neighbor_function_name_mangled' ] if result ['nearest_neighbor_function_name_mangled' ] else result ['nearest_neighbor_function_name' ],
214+ "original_name" : function .name if hasattr (function , 'name' ) else 'Unknown' ,
215+ "matched_name" : matched_name ,
216+ "matched_name_mangled" : matched_name_mangled ,
182217 "signature" : "N/A" ,
183- "matched_binary" : result ['nearest_neighbor_binary_name' ],
184- "similarity" : f"{ (result ['confidence' ] * 100 ):.2f} %" ,
185- "nearest_neighbor_id" : result ['nearest_neighbor_id' ],
186- "function_address" : function .start
218+ "matched_binary" : function_by_distance ['nearest_neighbor_binary_name' ],
219+ "similarity" : f"{ (function_by_distance ['confidence' ] * 100 ):.2f} %" ,
220+ "confidence" : f"{ confidence :.2f} %" ,
221+ "nearest_neighbor_id" : function_by_distance ['nearest_neighbor_id' ],
222+ "function_address" : function .start if hasattr (function , 'start' ) else 0
187223 })
188224
189225 except Exception as e :
190- log_error (f"RevEng.AI | Error processing function { result [ 'origin_function_id' ] } : { str (e )} " )
226+ log_error (f"RevEng.AI | Error processing function { function_by_distance . get ( 'origin_function_id' , 'Unknown' ) } : { str (e )} " )
191227 return True , results
192228
193229 except Exception as e :
@@ -312,22 +348,25 @@ def rename_function(self, bv: BinaryView, selected_result: Dict) -> List[Dict]:
312348
313349 renamed_count = 0
314350 failed_count = 0
315-
351+ deci = BinjaInterface ( bv )
316352 function_address = selected_result .get ("function_address" )
317- new_name = selected_result .get ("matched_name " )
353+ new_name = selected_result .get ("matched_name_mangled " )
318354
319355 if not function_address or not new_name :
320356 log_error (f"RevEng.AI | Missing function address or name for rename" )
321357 failed_count += 1
322358 return False , "Missing function address or name for rename"
323359
324360 if rename_function_util (bv , function_address , new_name ):
325- renamed_count += 1
326- log_info (f"RevEng.AI | Successfully renamed function at { function_address :x} to { new_name } " )
327- else :
328- failed_count += 1
329- log_error (f"RevEng.AI | Failed to rename function at { function_address :x} " )
330-
361+ renamed_count += 1
362+ if selected_result .get ('signature_data' , None ) is not None :
363+ log_info (f"RevEng.AI | Applying data types for 0x{ function_address :x} " )
364+ if deci is not None :
365+ try :
366+ apply_data_types_util (function_address , selected_result ['signature_data' ], deci )
367+ log_info (f"RevEng.AI | Successfully applied data types for 0x{ function_address :x} " )
368+ except Exception as e :
369+ log_error (f"RevEng.AI | Failed to apply data types for 0x{ function_address :x} : { str (e )} " )
331370
332371 message = f"Successfully renamed { renamed_count } functions"
333372 if failed_count > 0 :
@@ -340,35 +379,58 @@ def rename_function(self, bv: BinaryView, selected_result: Dict) -> List[Dict]:
340379 log_error (f"RevEng.AI | Error in function renaming: { str (e )} " )
341380 return False , str (e )
342381
343- def _process_data_type_batch (self , chunk : List [Dict ]) -> List [Dict ]:
382+ def _process_data_type_batch (self , chunk : List [Dict ], chunk_index : int ) -> List [Dict ]:
344383 try :
345384 log_info (f"RevEng.AI | Processing chunk of { len (chunk )} functions" )
346385 function_ids = set ([result ['nearest_neighbor_id' ] for result in chunk ])
386+ log_info (f"RevEng.AI | Cancelled: { self .cancelled .is_set ()} " )
387+ if self .cancelled .is_set ():
388+ return []
389+
347390 RE_functions_data_types (function_ids = list (function_ids ))
391+ log_info (f"RevEng.AI | Cancelled: { self .cancelled .is_set ()} " )
392+ if self .cancelled .is_set ():
393+ return []
348394 signatures = []
349395 items = []
350396 while True :
397+ if self .cancelled .is_set ():
398+ return []
399+
351400 response = RE_functions_data_types_poll (
352401 function_ids = list (function_ids ),
353402 ).json ()
354403 data = response .get ("data" , {})
355404 items = data .get ("items" , [])
356-
357405 pending_count = sum (1 for item in items if item .get ("status" ) == "pending" )
358- log_info (f"RevEng.AI | { pending_count } items still pending... trying again " )
406+ log_info (f"RevEng.AI | [Chunk { chunk_index } ] { pending_count } items still pending..." )
359407 if not pending_count :
360408 break
361409 time .sleep (3 )
362410
363411 for item in items :
412+ log_info (f"RevEng.AI | Cancelled: { self .cancelled .is_set ()} " )
413+ if self .cancelled .is_set ():
414+ return []
364415 log_info (f"RevEng.AI | Item: { item ['function_id' ]} " )
365416 if item ['status' ] != "completed" :
366417 continue
367418 for result in chunk :
368419 if result ['nearest_neighbor_id' ] == item ['function_id' ]:
369- signature = self .make_signature (item ['data_types' ])
370- if signature != "N/A" :
371- signatures .append ({"nearest_neighbor_id" : result ['nearest_neighbor_id' ], "signature" : signature })
420+ signature = "N/A"
421+ item2 = item .get ("data_types" , {})
422+ func_types = item2 .get ("func_types" , None )
423+ func_deps = item2 .get ("func_deps" , [])
424+ log_info (f"RevEng.AI | Func types: { func_types } " )
425+ if func_types is not None :
426+ fnc : Function = _art_from_dict (func_types )
427+ if fnc .name is None :
428+ log_info (f"Function { item ['function_id' ]} has no name, skipping signature application." )
429+ continue
430+ log_info (f"Applying signature for { fnc .name } " )
431+ signature = self .function_to_str (fnc )
432+ if signature != "N/A" :
433+ signatures .append ({"nearest_neighbor_id" : result ['nearest_neighbor_id' ], "signature" : signature , "data_types" : item ['data_types' ], "signature_data" : {"deps" : func_deps , "function" : fnc }})
372434 break
373435
374436 #log_info(f"RevEng.AI | Total count: {total_count}")
@@ -413,10 +475,12 @@ def fetch_data_types(self, bv: BinaryView, selected_results: List[Dict]) -> Tupl
413475 log_info (f"RevEng.AI | Processing { len (selected_results )} functions in { len (chunks )} chunks of size { chunk_size } " )
414476
415477 signatures = []
478+ if self .cancelled .is_set ():
479+ return False , "Operation cancelled"
416480
417481 with ThreadPoolExecutor (max_workers = 4 ) as executor :
418482 future_to_chunk = {
419- executor .submit (self ._process_data_type_batch , chunk ): i
483+ executor .submit (self ._process_data_type_batch , chunk , i ): i
420484 for i , chunk in enumerate (chunks )
421485 }
422486
@@ -438,4 +502,26 @@ def fetch_data_types(self, bv: BinaryView, selected_results: List[Dict]) -> Tupl
438502 return True , options
439503 except Exception as e :
440504 log_error (f"RevEng.AI | Error fetching data types: { str (e )} " )
441- return False , str (e )
505+ return False , str (e )
506+
507+ def function_arguments (self , fnc : Function ) -> list [str ]:
508+ args = []
509+ for k in fnc .header .args :
510+ arg : FunctionArgument = fnc .header .args [k ]
511+ args .append (
512+ f"{ arg .type } { arg .name } "
513+ )
514+ return args
515+
516+ def function_to_str (self , fnc : Function ) -> str :
517+ # convert the signature to a string representation
518+ return f"{ fnc .type } { fnc .name } " \
519+ f"({ ', ' .join (self .function_arguments (fnc ))} )"
520+
521+ def cancel (self ):
522+ log_info ("RevEng.AI | Cancelling operation..." )
523+ self .cancelled .set ()
524+
525+ def clear_cancelled (self ):
526+ log_info ("RevEng.AI | Clearing cancelled event..." )
527+ self .cancelled .clear ()
0 commit comments