Skip to content

Commit 7472f18

Browse files
committed
finishing datatypes
1 parent 4a775f3 commit 7472f18

5 files changed

Lines changed: 190 additions & 167 deletions

File tree

revengai/features/match_functions/match_functions.py

Lines changed: 57 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@
77
import time
88
from libbs.artifacts import _art_from_dict
99
from libbs.api import DecompilerInterface
10-
from libbs.decompilers.binja.interface import BinjaInterface as BNInterface
10+
from libbs.decompilers.binja.interface import BinjaInterface
1111
from concurrent.futures import ThreadPoolExecutor, as_completed
1212
from revengai.utils import rename_function as rename_function_util
13+
from revengai.utils import apply_data_types as apply_data_types_util
1314
from libbs.artifacts import (
1415
Function,
1516
FunctionArgument,
@@ -71,6 +72,7 @@ def _process_batch(self, function_ids: List[int], id_to_addr: Dict[int, int], co
7172
"icon_path": f"{os.path.dirname(__file__)}/../../images/failed.png",
7273
"icon_text": "Failed",
7374
"original_name": "N/A",
75+
"demangled_name": result['nearest_neighbor_function_name'] if result['nearest_neighbor_function_name'] else result['nearest_neighbor_function_name_mangled'],
7476
"matched_name": result['nearest_neighbor_function_name_mangled'] if result['nearest_neighbor_function_name_mangled'] else result['nearest_neighbor_function_name'],
7577
"signature": "N/A",
7678
"matched_binary": result['nearest_neighbor_binary_name'],
@@ -175,6 +177,7 @@ def match_functions(self, bv: BinaryView, options: Dict[str, Any]) -> List[Dict]
175177
"icon_path": f"{os.path.dirname(__file__)}/../../images/failed.png",
176178
"icon_text": "Failed",
177179
"original_name": function.name,
180+
"demangled_name": "N/A",
178181
"matched_name": "N/A",
179182
"signature": "N/A",
180183
"matched_binary": "N/A",
@@ -351,24 +354,63 @@ def build_items(items_list: List[Dict[str, Any]], item_type: str) -> List[Tuple]
351354
log_error("Getting collections failed. Reason: %s", str(e))
352355
return False, str(e)
353356

354-
def rename_functions(self, bv: BinaryView, selected_results: List[Dict]) -> List[Dict]:
355-
"""Rename functions from the binary against RevEng.AI database"""
357+
def _process_rename_batch(self, chunk: List[Dict], bv: BinaryView, deci: DecompilerInterface = None) -> Tuple[int, int]:
356358
try:
357-
log_info("RevEng.AI | Starting function renaming")
358-
359+
log_info(f"RevEng.AI | Processing chunk of {len(chunk)} functions")
359360
renamed_count = 0
360-
for result in selected_results:
361-
# Convert function_address from string to int
361+
datatype_count = 0
362+
for result in chunk:
362363
try:
363364
addr = int(result['function_address'])
365+
if rename_function_util(bv, addr, result['matched_name']):
366+
renamed_count += 1
367+
if result.get('signature_data', None) is not None:
368+
log_info(f"RevEng.AI | Applying data types for 0x{addr:x}")
369+
if deci is not None:
370+
try:
371+
apply_data_types_util(addr, result['signature_data'], deci)
372+
datatype_count += 1
373+
log_info(f"RevEng.AI | Successfully applied data types for 0x{addr:x}")
374+
except Exception as e:
375+
log_error(f"RevEng.AI | Failed to apply data types for 0x{addr:x}: {str(e)}")
376+
364377
except (ValueError, TypeError):
365378
log_error(f"RevEng.AI | Invalid function address: {result}")
366379
continue
367380

368-
if rename_function_util(bv, addr, result['matched_name']):
369-
renamed_count += 1
381+
return renamed_count, datatype_count
382+
383+
except Exception as e:
384+
log_error(f"RevEng.AI | Error processing rename batch: {str(e)}")
385+
return 0, 0
386+
387+
def rename_functions(self, bv: BinaryView, selected_results: List[Dict]) -> List[Dict]:
388+
"""Rename functions from the binary against RevEng.AI database"""
389+
try:
390+
log_info("RevEng.AI | Starting function renaming")
391+
total_renamed_count = 0
392+
chunk_size = 50
393+
deci = DecompilerInterface.discover(force_decompiler="binja", bv=bv)
394+
chunks = [selected_results[i:i + chunk_size] for i in range(0, len(selected_results), chunk_size)]
395+
396+
log_info(f"RevEng.AI | Processing {len(selected_results)} functions in {len(chunks)} chunks of size {chunk_size}")
397+
398+
with ThreadPoolExecutor(max_workers=4) as executor:
399+
future_to_chunk = {
400+
executor.submit(self._process_rename_batch, chunk, bv, deci): i
401+
for i, chunk in enumerate(chunks)
402+
}
370403

371-
success_message = f"Successfully renamed {renamed_count} functions!" if renamed_count > 0 else "No functions were renamed!"
404+
for future in as_completed(future_to_chunk):
405+
chunk_index = future_to_chunk[future]
406+
try:
407+
renamed_count, datatype_count = future.result()
408+
total_renamed_count += renamed_count
409+
log_info(f"RevEng.AI | Chunk {chunk_index} completed: renamed {renamed_count} functions, applied {datatype_count} data types")
410+
except Exception as e:
411+
log_error(f"RevEng.AI | Error processing chunk {chunk_index}: {str(e)}")
412+
413+
success_message = f"Successfully renamed {total_renamed_count} functions!" if total_renamed_count > 0 else "No functions were renamed!"
372414

373415
log_info(f"RevEng.AI | {success_message}")
374416

@@ -377,7 +419,7 @@ def rename_functions(self, bv: BinaryView, selected_results: List[Dict]) -> List
377419
log_error(f"RevEng.AI | Error renaming functions: {str(e)}")
378420
return False, str(e)
379421

380-
def _process_data_type_batch(self, chunk: List[Dict]) -> List[Dict]:
422+
def _process_data_type_batch(self, chunk: List[Dict], chunk_index: int) -> List[Dict]:
381423
try:
382424
log_info(f"RevEng.AI | Processing chunk of {len(chunk)} functions")
383425
function_ids = set([result['nearest_neighbor_id'] for result in chunk])
@@ -391,7 +433,7 @@ def _process_data_type_batch(self, chunk: List[Dict]) -> List[Dict]:
391433
data = response.get("data", {})
392434
items = data.get("items", [])
393435
pending_count = sum(1 for item in items if item.get("status") == "pending")
394-
log_info(f"RevEng.AI | {pending_count} items still pending...")
436+
log_info(f"RevEng.AI | [Chunk {chunk_index}] {pending_count} items still pending...")
395437
if not pending_count:
396438
break
397439
time.sleep(3)
@@ -436,13 +478,11 @@ def function_arguments(self, fnc: Function) -> list[str]:
436478
)
437479
return args
438480

439-
440481
def function_to_str(self, fnc: Function) -> str:
441482
# convert the signature to a string representation
442483
return f"{fnc.type} {fnc.name}"\
443484
f"({', '.join(self.function_arguments(fnc))})"
444-
445-
485+
446486
def make_signature(self, data_types: List[Dict]) -> str:
447487
try:
448488
#log_info(f"RevEng.AI | Making signature for {data_types}")
@@ -479,7 +519,7 @@ def fetch_data_types(self, bv: BinaryView, selected_results: List[Dict]) -> Tupl
479519

480520
with ThreadPoolExecutor(max_workers=4) as executor:
481521
future_to_chunk = {
482-
executor.submit(self._process_data_type_batch, chunk): i
522+
executor.submit(self._process_data_type_batch, chunk, i): i
483523
for i, chunk in enumerate(chunks)
484524
}
485525

@@ -501,123 +541,4 @@ def fetch_data_types(self, bv: BinaryView, selected_results: List[Dict]) -> Tupl
501541
return True, options
502542
except Exception as e:
503543
log_error(f"RevEng.AI | Error fetching data types: {str(e)}")
504-
return False, str(e)
505-
506-
def test(self, bv: BinaryView, selected_results: List[Dict]) -> Tuple[bool, Dict[str, Any]]:
507-
try:
508-
log_info("RevEng.AI | Starting test")
509-
510-
decompiler = DecompilerInterface.discover(force_decompiler="binja")
511-
512-
log_info(f"RevEng.AI | Decompiler: {decompiler}")
513-
log_info(f"RevEng.AI | Type: {type(decompiler)}")
514-
515-
for addr, func in decompiler.functions.items():
516-
log_info(f"RevEng.AI | {addr}: {func}")
517-
518-
519-
log_info(f"RevEng.AI | Decompiler: {decompiler}")
520-
for result in selected_results:
521-
if result.get('signature_data', None) is not None:
522-
log_info(f"RevEng.AI | Testing 0x{result['function_address']:x}")
523-
self._apply_data_types(result['function_address'], result['signature_data'], decompiler)
524-
525-
526-
527-
return True, "Test completed"
528-
except Exception as e:
529-
log_error(f"RevEng.AI | Error testing: {str(e)}")
530-
return False, str(e)
531-
532-
def _apply_type(
533-
self,
534-
deci: DecompilerInterface,
535-
artifact,
536-
soft_skip=False
537-
) -> None | str:
538-
supported_types = [
539-
Function,
540-
GlobalVariable,
541-
Enum,
542-
Struct,
543-
Typedef
544-
]
545-
546-
if not any(isinstance(artifact, t) for t in supported_types):
547-
return "Unsupported artifact type: " \
548-
f"{artifact.__class__.__name__}"
549-
550-
try:
551-
log_info(f"RevEng.AI | Applying artifact: {artifact.name}")
552-
log_info(f"RevEng.AI | Artifact type: {artifact.__class__.__name__}")
553-
log_info(f"RevEng.AI | Artifact Name: {artifact.name}")
554-
log_info(f"RevEng.AI | Decompiler: {deci}")
555-
if isinstance(artifact, Function):
556-
deci.functions[artifact.addr] = artifact
557-
elif isinstance(artifact, GlobalVariable):
558-
deci.global_vars[artifact.addr] = artifact
559-
elif isinstance(artifact, Enum):
560-
deci.enums[artifact.name] = artifact
561-
elif isinstance(artifact, Struct):
562-
deci.structs[artifact.name] = artifact
563-
elif isinstance(artifact, Typedef):
564-
deci.typedefs[artifact.name] = artifact
565-
566-
567-
568-
except Exception as e:
569-
log_error(f"Error while applying artifact '{artifact.name}'"
570-
f" of type {artifact.__class__.__name__}: {e}")
571-
if not soft_skip:
572-
return f"Error while applying artifact '{artifact.name}'"\
573-
f" of type {artifact.__class__.__name__}: {e}"
574-
575-
return None
576-
577-
def _apply_types(self, deci: DecompilerInterface, artifacts: List) -> None | str:
578-
for artifact in artifacts:
579-
error = self._apply_type(deci, artifact, True)
580-
if error is not None:
581-
return error
582-
return None
583-
584-
def _load_many_artifacts_from_list(self, artifacts: list[dict]) -> list:
585-
_artifacts = []
586-
for artifact in artifacts:
587-
art = _art_from_dict(artifact)
588-
if art is not None:
589-
_artifacts.append(art)
590-
return _artifacts
591-
592-
def _apply_data_types(self, function_addr: int = 0,
593-
signature=None,
594-
deci: DecompilerInterface = None,) -> None:
595-
if not deci:
596-
log_error("RevEng.AI | Unable to find a decompiler")
597-
return
598-
599-
try:
600-
# get the function signature from the table
601-
function: Function = signature.get("function")
602-
deps = signature.get("deps")
603-
604-
function.addr = function_addr
605-
606-
# fisrt apply the dependencies
607-
res = self._apply_types(deci, self._load_many_artifacts_from_list(deps))
608-
if res is not None:
609-
log_error(
610-
f"Failed to apply function dependencies: {res}")
611-
return
612-
613-
# then apply the function signature
614-
res = self._apply_type(deci, function)
615-
if res is not None:
616-
log_error(f"Failed to apply function signature: {res}")
617-
return
618-
619-
# show success message
620-
log_info("Successfully applied function signature and dependencies")
621-
622-
except Exception as e:
623-
log_error(f"Error: {e}")
544+
return False, str(e)

revengai/features/match_functions/match_functions_dialog.py

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,8 @@ def create_footer_layout(self):
7777
self.fetch_results_button = QPushButton("Fetch Results")
7878
self.fetch_data_types_button = QPushButton("Fetch Data Types")
7979
self.rename_selected_button = QPushButton("Rename Selected")
80-
self.test_button = QPushButton("Test")
8180

82-
for button in [self.fetch_results_button, self.fetch_data_types_button, self.rename_selected_button, self.test_button]:
81+
for button in [self.fetch_results_button, self.fetch_data_types_button, self.rename_selected_button]:
8382
button.setStyleSheet("""
8483
QPushButton {
8584
background-color: #6c757d;
@@ -95,7 +94,6 @@ def create_footer_layout(self):
9594
self.fetch_results_button.clicked.connect(self.start_matching)
9695
self.fetch_data_types_button.clicked.connect(self.start_fetching_data_types)
9796
self.rename_selected_button.clicked.connect(self.start_renaming)
98-
self.test_button.clicked.connect(self.start_test)
9997

10098
for button in [self.fetch_data_types_button, self.rename_selected_button]:
10199
button.setEnabled(False)
@@ -111,34 +109,10 @@ def create_footer_layout(self):
111109
buttons_layout.addWidget(self.fetch_results_button)
112110
buttons_layout.addWidget(self.fetch_data_types_button)
113111
buttons_layout.addWidget(self.rename_selected_button)
114-
buttons_layout.addWidget(self.test_button)
115112

116113
footer_layout.addLayout(buttons_layout)
117114
return footer_layout
118115

119-
def start_test(self):
120-
log_info("RevEng.AI | Starting test process")
121-
122-
self.progress = create_progress_dialog(self, "RevEng.AI Test", "Testing...")
123-
self.progress.show()
124-
QCoreApplication.processEvents()
125-
self.status_label.setText("Testing...")
126-
127-
self.test_thread = DataThread(self.match_functions.test, self.bv, self.results_tab.selected_results)
128-
self.test_thread.finished.connect(self.on_test_finished)
129-
self.test_thread.start()
130-
131-
def on_test_finished(self, success, data):
132-
self.progress.close()
133-
134-
if success:
135-
log_info(f"RevEng.AI | Test completed: {data}")
136-
QMessageBox.information(self, "RevEng.AI Test", f"{data}", QMessageBox.Ok)
137-
else:
138-
log_error(f"RevEng.AI | Test failed: {data}")
139-
self.status_label.setText(f"Test failed: {data}")
140-
QMessageBox.critical(self, "RevEng.AI Test Error", f"Failed to test:\n{data}", QMessageBox.Ok)
141-
142116
def start_matching(self):
143117
confidence_threshold = self.confidenceSlider.value()
144118

revengai/features/match_functions/tab_result.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def populate_results_table(self):
8585

8686
column_data = [
8787
"original_name",
88-
"matched_name",
88+
"demangled_name",
8989
"signature",
9090
"matched_binary",
9191
"similarity",

revengai/utils/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from .periodic_check import PeriodicChecker
22
from .base_auth_feature import BaseAuthFeature
33
from .progress_dialog import create_progress_dialog, create_cancellable_progress_dialog
4-
from .utils import rename_function, parse_date
4+
from .utils import rename_function, parse_date, apply_data_types
55

6-
__all__ = ['PeriodicChecker', 'BaseAuthFeature', 'create_progress_dialog', 'create_cancellable_progress_dialog', 'rename_function', 'parse_date']
6+
__all__ = ['PeriodicChecker', 'BaseAuthFeature', 'create_progress_dialog', 'create_cancellable_progress_dialog', 'rename_function', 'parse_date', 'apply_data_types']

0 commit comments

Comments
 (0)