Skip to content

Commit d686cfb

Browse files
committed
adding datatypes to autounstrip
1 parent 7472f18 commit d686cfb

9 files changed

Lines changed: 234 additions & 63 deletions

File tree

revengai/features/auto_unstrip/auto_unstrip.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@ def __init__(self, config):
1313
self.path = None
1414
self.max_workers = 4
1515

16-
def _process_batch(self, function_ids: List[int], id_to_addr: Dict[int, int], bv: BinaryView) -> Tuple[int, List[str]]:
16+
def _process_batch(self, function_ids: List[int], id_to_addr: Dict[int, int], bv: BinaryView, debug_symbols: bool, data_types: bool) -> Tuple[int, List[str]]:
1717
try:
1818
functions_by_distance = RE_nearest_symbols_batch(
1919
function_ids=function_ids,
2020
distance=self.auto_unstrip_distance,
21-
debug_enabled=True,
21+
debug_enabled=debug_symbols,
2222
nns=1
2323
).json()["function_matches"]
2424

@@ -54,6 +54,7 @@ def _process_batch(self, function_ids: List[int], id_to_addr: Dict[int, int], bv
5454
log_info(f"RevEng.AI | Function {function['function_id']} has a score of {function['box_plot']['average']:.2f} for name {new_name_mangled}, renaming")
5555
if rename_function_util(bv, func_addr, new_name_mangled):
5656
renamed_count += 1
57+
5758
break
5859

5960

@@ -66,10 +67,13 @@ def _process_batch(self, function_ids: List[int], id_to_addr: Dict[int, int], bv
6667
except Exception as e:
6768
return 0, [str(e)]
6869

69-
def auto_unstrip(self, bv: BinaryView):
70+
def auto_unstrip(self, bv: BinaryView, options: Dict[str, Any]):
7071
try:
7172
log_info("RevEng.AI | Auto Unstripping binary")
7273

74+
debug_symbols = options.get("debug_symbols", True)
75+
data_types = options.get("data_types", False)
76+
7377
self.base_addr = bv.image_base
7478
self.path = bv.file.filename
7579
binary_id = self.config.get_binary_id(bv)
@@ -99,7 +103,7 @@ def auto_unstrip(self, bv: BinaryView):
99103
all_errors = []
100104
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
101105
future_to_chunk = {
102-
executor.submit(self._process_batch, chunk, id_to_addr, bv): i
106+
executor.submit(self._process_batch, chunk, id_to_addr, bv, debug_symbols, data_types): i
103107
for i, chunk in enumerate(chunks)
104108
}
105109

revengai/features/auto_unstrip/auto_unstrip_dialog.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from binaryninja import log_error
22
from PySide6.QtWidgets import (QDialog, QVBoxLayout, QHBoxLayout,
3-
QPushButton, QLabel)
3+
QPushButton, QLabel, QCheckBox)
44
from PySide6.QtCore import Qt
55
from PySide6.QtGui import QPixmap
66
from PySide6.QtCore import QCoreApplication
@@ -43,6 +43,14 @@ def init_ui(self):
4343
header_layout.addLayout(info_layout, stretch=1)
4444

4545
layout.addLayout(header_layout)
46+
47+
self.debug_symbols_checkbox = QCheckBox("Limit Matches to Debug Symbols")
48+
self.debug_symbols_checkbox.setChecked(True)
49+
layout.addWidget(self.debug_symbols_checkbox)
50+
self.data_types_checkbox = QCheckBox("Get functions Data Types (renaming may take longer)")
51+
self.data_types_checkbox.setChecked(False)
52+
layout.addWidget(self.data_types_checkbox)
53+
4654
layout.addSpacing(20)
4755

4856
button_layout = QHBoxLayout()
@@ -76,9 +84,14 @@ def init_ui(self):
7684
def _auto_unstrip(self):
7785
self.progress = create_progress_dialog(self, "RevEng.AI Auto Unstrip", "Auto Unstripping binary...")
7886
self.progress.show()
79-
QCoreApplication.processEvents()
87+
QCoreApplication.processEvents()
88+
89+
options = {
90+
"debug_symbols": self.debug_symbols_checkbox.isChecked(),
91+
"data_types": self.data_types_checkbox.isChecked()
92+
}
8093

81-
self.auto_unstrip_thread = DataThread(self.auto_unstrip.auto_unstrip, self.bv)
94+
self.auto_unstrip_thread = DataThread(self.auto_unstrip.auto_unstrip, self.bv, options)
8295
self.auto_unstrip_thread.finished.connect(self._on_auto_unstrip_finished)
8396
self.auto_unstrip_thread.start()
8497

revengai/features/match_current_function/match_current_function.py

Lines changed: 114 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,20 @@
77
import re
88
import time
99
from concurrent.futures import ThreadPoolExecutor, as_completed
10+
from libbs.artifacts import _art_from_dict
11+
from libbs.api import DecompilerInterface
12+
from libbs.decompilers.binja.interface import BinjaInterface
13+
from threading import Event
1014
from revengai.utils import rename_function as rename_function_util
15+
from revengai.utils import apply_data_types as apply_data_types_util
16+
from libbs.artifacts import (
17+
Function,
18+
FunctionArgument,
19+
GlobalVariable,
20+
Enum,
21+
Struct,
22+
Typedef,
23+
)
1124

1225
class MatchCurrentFunction:
1326
def __init__(self, config):
@@ -18,6 +31,7 @@ def __init__(self, config):
1831
self.analyzed_functions = []
1932
self.filtered_collections = []
2033
self.filtered_binaries = []
34+
self.cancelled = Event()
2135

2236
def search_collections(self, bv: BinaryView, search_term: str = ""):
2337
try:
@@ -129,13 +143,13 @@ def match_functions(self, bv: BinaryView, options: Dict[str, Any]) -> List[Dict]
129143
function_addr = options.get("function", None)
130144
result = { "matched": 0, "skipped": 0, "data": [] }
131145

132-
functions = bv.get_functions_containing(function_addr)
146+
functions_containing = bv.get_functions_containing(function_addr)
133147

134-
if not functions:
148+
if not functions_containing:
135149
log_error(f"RevEng.AI | Function not found at 0x{function_addr:x}")
136150
raise Exception("Function not found at address")
137151

138-
function = functions[0]
152+
function = functions_containing[0]
139153
log_info(f"RevEng.AI | Function: {function.name} at 0x{function.start:x}")
140154

141155
filtered_collections = []
@@ -150,7 +164,7 @@ def match_functions(self, bv: BinaryView, options: Dict[str, Any]) -> List[Dict]
150164
log_info(f"RevEng.AI | Selected collections: {selected_collections}")
151165
log_info(f"RevEng.AI | Debug symbols: {debug_symbols}")
152166
log_info(f"RevEng.AI | Debug symbols count: {debug_symbols_count}")
153-
log_info(f"RevEng.AI | Function: 0x{function_addr:x}")
167+
log_info(f"RevEng.AI | Clicked address: 0x{function_addr:x}")
154168

155169
binary_id = self.config.get_binary_id(bv)
156170
if not binary_id:
@@ -173,21 +187,43 @@ def match_functions(self, bv: BinaryView, options: Dict[str, Any]) -> List[Dict]
173187
binaries=filtered_binaries,
174188
nns=debug_symbols_count
175189
).json()["function_matches"]
190+
191+
functions = []
192+
for function_by_distance in functions_by_distance:
193+
functions.append({"function_id": function_by_distance['origin_function_id'], "function_name": function_by_distance['nearest_neighbor_function_name']})
194+
if len(functions) == 0:
195+
return 0, []
196+
functions_by_score = RE_name_score(functions).json()["data"]
197+
if len(functions_by_score) == 0:
198+
return 0, []
199+
176200
results = []
177-
for result in functions_by_distance:
201+
for function_by_distance in functions_by_distance:
178202
try:
203+
204+
matched_name = function_by_distance['nearest_neighbor_function_name'] if function_by_distance['nearest_neighbor_function_name'] else function_by_distance['nearest_neighbor_function_name_mangled']
205+
matched_name_mangled = function_by_distance['nearest_neighbor_function_name_mangled'] if function_by_distance['nearest_neighbor_function_name_mangled'] else function_by_distance['nearest_neighbor_function_name']
206+
207+
functions = [{"function_id": function_by_distance['origin_function_id'], "function_name": matched_name_mangled}]
208+
functions_by_score = RE_name_score(functions).json()["data"]
209+
function_by_score = next((f for f in functions_by_score if f['function_id'] == function_by_distance['origin_function_id']), None)
210+
211+
confidence = function_by_score.get('box_plot', {}).get('average', 0) if function_by_score else 0
212+
179213
results.append({
180-
"original_name": function.name,
181-
"matched_name": result['nearest_neighbor_function_name_mangled'] if result['nearest_neighbor_function_name_mangled'] else result['nearest_neighbor_function_name'],
214+
"original_name": function.name if hasattr(function, 'name') else 'Unknown',
215+
"matched_name": matched_name,
216+
"matched_name_mangled": matched_name_mangled,
182217
"signature": "N/A",
183-
"matched_binary": result['nearest_neighbor_binary_name'],
184-
"similarity": f"{(result['confidence'] * 100):.2f}%",
185-
"nearest_neighbor_id": result['nearest_neighbor_id'],
186-
"function_address": function.start
218+
"matched_binary": function_by_distance['nearest_neighbor_binary_name'],
219+
"similarity": f"{(function_by_distance['confidence'] * 100):.2f}%",
220+
"confidence": f"{confidence:.2f}%",
221+
"nearest_neighbor_id": function_by_distance['nearest_neighbor_id'],
222+
"function_address": function.start if hasattr(function, 'start') else 0
187223
})
188224

189225
except Exception as e:
190-
log_error(f"RevEng.AI | Error processing function {result['origin_function_id']}: {str(e)}")
226+
log_error(f"RevEng.AI | Error processing function {function_by_distance.get('origin_function_id', 'Unknown')}: {str(e)}")
191227
return True, results
192228

193229
except Exception as e:
@@ -312,22 +348,25 @@ def rename_function(self, bv: BinaryView, selected_result: Dict) -> List[Dict]:
312348

313349
renamed_count = 0
314350
failed_count = 0
315-
351+
deci = BinjaInterface(bv)
316352
function_address = selected_result.get("function_address")
317-
new_name = selected_result.get("matched_name")
353+
new_name = selected_result.get("matched_name_mangled")
318354

319355
if not function_address or not new_name:
320356
log_error(f"RevEng.AI | Missing function address or name for rename")
321357
failed_count += 1
322358
return False, "Missing function address or name for rename"
323359

324360
if rename_function_util(bv, function_address, new_name):
325-
renamed_count += 1
326-
log_info(f"RevEng.AI | Successfully renamed function at {function_address:x} to {new_name}")
327-
else:
328-
failed_count += 1
329-
log_error(f"RevEng.AI | Failed to rename function at {function_address:x}")
330-
361+
renamed_count += 1
362+
if selected_result.get('signature_data', None) is not None:
363+
log_info(f"RevEng.AI | Applying data types for 0x{function_address:x}")
364+
if deci is not None:
365+
try:
366+
apply_data_types_util(function_address, selected_result['signature_data'], deci)
367+
log_info(f"RevEng.AI | Successfully applied data types for 0x{function_address:x}")
368+
except Exception as e:
369+
log_error(f"RevEng.AI | Failed to apply data types for 0x{function_address:x}: {str(e)}")
331370

332371
message = f"Successfully renamed {renamed_count} functions"
333372
if failed_count > 0:
@@ -340,35 +379,58 @@ def rename_function(self, bv: BinaryView, selected_result: Dict) -> List[Dict]:
340379
log_error(f"RevEng.AI | Error in function renaming: {str(e)}")
341380
return False, str(e)
342381

343-
def _process_data_type_batch(self, chunk: List[Dict]) -> List[Dict]:
382+
def _process_data_type_batch(self, chunk: List[Dict], chunk_index: int) -> List[Dict]:
344383
try:
345384
log_info(f"RevEng.AI | Processing chunk of {len(chunk)} functions")
346385
function_ids = set([result['nearest_neighbor_id'] for result in chunk])
386+
log_info(f"RevEng.AI | Cancelled: {self.cancelled.is_set()}")
387+
if self.cancelled.is_set():
388+
return []
389+
347390
RE_functions_data_types(function_ids=list(function_ids))
391+
log_info(f"RevEng.AI | Cancelled: {self.cancelled.is_set()}")
392+
if self.cancelled.is_set():
393+
return []
348394
signatures = []
349395
items = []
350396
while True:
397+
if self.cancelled.is_set():
398+
return []
399+
351400
response = RE_functions_data_types_poll(
352401
function_ids=list(function_ids),
353402
).json()
354403
data = response.get("data", {})
355404
items = data.get("items", [])
356-
357405
pending_count = sum(1 for item in items if item.get("status") == "pending")
358-
log_info(f"RevEng.AI | {pending_count} items still pending... trying again")
406+
log_info(f"RevEng.AI | [Chunk {chunk_index}] {pending_count} items still pending...")
359407
if not pending_count:
360408
break
361409
time.sleep(3)
362410

363411
for item in items:
412+
log_info(f"RevEng.AI | Cancelled: {self.cancelled.is_set()}")
413+
if self.cancelled.is_set():
414+
return []
364415
log_info(f"RevEng.AI | Item: {item['function_id']}")
365416
if item['status'] != "completed":
366417
continue
367418
for result in chunk:
368419
if result['nearest_neighbor_id'] == item['function_id']:
369-
signature = self.make_signature(item['data_types'])
370-
if signature != "N/A":
371-
signatures.append({"nearest_neighbor_id": result['nearest_neighbor_id'], "signature": signature})
420+
signature = "N/A"
421+
item2 = item.get("data_types", {})
422+
func_types = item2.get("func_types", None)
423+
func_deps = item2.get("func_deps", [])
424+
log_info(f"RevEng.AI | Func types: {func_types}")
425+
if func_types is not None:
426+
fnc: Function = _art_from_dict(func_types)
427+
if fnc.name is None:
428+
log_info(f"Function {item['function_id']} has no name, skipping signature application.")
429+
continue
430+
log_info(f"Applying signature for {fnc.name}")
431+
signature = self.function_to_str(fnc)
432+
if signature != "N/A":
433+
signatures.append({"nearest_neighbor_id": result['nearest_neighbor_id'], "signature": signature, "data_types": item['data_types'], "signature_data": {"deps": func_deps, "function": fnc}})
372434
break
373435

374436
#log_info(f"RevEng.AI | Total count: {total_count}")
@@ -413,10 +475,12 @@ def fetch_data_types(self, bv: BinaryView, selected_results: List[Dict]) -> Tupl
413475
log_info(f"RevEng.AI | Processing {len(selected_results)} functions in {len(chunks)} chunks of size {chunk_size}")
414476

415477
signatures = []
478+
if self.cancelled.is_set():
479+
return False, "Operation cancelled"
416480

417481
with ThreadPoolExecutor(max_workers=4) as executor:
418482
future_to_chunk = {
419-
executor.submit(self._process_data_type_batch, chunk): i
483+
executor.submit(self._process_data_type_batch, chunk, i): i
420484
for i, chunk in enumerate(chunks)
421485
}
422486

@@ -438,4 +502,26 @@ def fetch_data_types(self, bv: BinaryView, selected_results: List[Dict]) -> Tupl
438502
return True, options
439503
except Exception as e:
440504
log_error(f"RevEng.AI | Error fetching data types: {str(e)}")
441-
return False, str(e)
505+
return False, str(e)
506+
507+
def function_arguments(self, fnc: Function) -> list[str]:
508+
args = []
509+
for k in fnc.header.args:
510+
arg: FunctionArgument = fnc.header.args[k]
511+
args.append(
512+
f"{arg.type} {arg.name}"
513+
)
514+
return args
515+
516+
def function_to_str(self, fnc: Function) -> str:
517+
# convert the signature to a string representation
518+
return f"{fnc.type} {fnc.name}"\
519+
f"({', '.join(self.function_arguments(fnc))})"
520+
521+
def cancel(self):
522+
log_info("RevEng.AI | Cancelling operation...")
523+
self.cancelled.set()
524+
525+
def clear_cancelled(self):
526+
log_info("RevEng.AI | Clearing cancelled event...")
527+
self.cancelled.clear()

revengai/features/match_current_function/match_current_function_dialog.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
QSplitter, QTextEdit, QProgressBar, QSlider)
77
from PySide6.QtCore import Qt, QTimer, QCoreApplication
88
from PySide6.QtGui import QIcon
9-
from revengai.utils import create_progress_dialog
9+
from revengai.utils import create_progress_dialog, create_cancellable_progress_dialog
1010
from revengai.utils.data_thread import DataThread
1111
from .tab_search import SearchTab
1212
from .tab_result import ResultTab
@@ -207,15 +207,16 @@ def start_fetching_data_types(self):
207207
try:
208208

209209
# Create and show progress dialog
210-
self.progress = create_progress_dialog(self, "RevEng.AI Fetch Data Types", "Fetching data types...")
210+
self.progress = create_cancellable_progress_dialog(self, "RevEng.AI Fetch Data Types", "Fetching data types...", self.match_current_function.cancel)
211211
self.progress.show()
212212
QCoreApplication.processEvents()
213213
self.status_label.setText("Fetching data types...")
214214

215215
self.fetch_data_types_thread = DataThread(
216216
self.match_current_function.fetch_data_types,
217217
self.bv,
218-
self.results_tab.current_matches
218+
self.results_tab.current_matches,
219+
self.match_current_function.clear_cancelled
219220
)
220221
self.fetch_data_types_thread.finished.connect(self.on_fetching_data_types_finished)
221222
self.fetch_data_types_thread.start()
@@ -289,6 +290,8 @@ def on_matching_finished(self, success, data):
289290
QMessageBox.Ok
290291
)
291292
else:
293+
self.results_tab.current_matches = []
294+
self.results_tab.populate_results_table()
292295
log_error(f"RevEng.AI | Current function matching failed: {data}")
293296
self.status_label.setText(f"Matching failed: {data}")
294297
QMessageBox.critical(

0 commit comments

Comments
 (0)