forked from pfalcon/ScratchABlock
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapply_xform.py
More file actions
executable file
·243 lines (202 loc) · 8.02 KB
/
apply_xform.py
File metadata and controls
executable file
·243 lines (202 loc) · 8.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
#!/usr/bin/env python3
import sys
import argparse
import os.path
import glob
import logging
import yaml
import yamlutils
import core
from parser import *
import dot
from dataflow import *
from xform import *
from xform_utils import *
from decomp import *
from asmprinter import AsmPrinter
import cprinter
import progdb
# TODO: something above shadows "copy" otherwise
import copy
FUNC_DB = {}
FUNC_DB_ORG = {}
def parse_args():
argp = argparse.ArgumentParser(description="Parse PseudoC program, apply transformations, and dump result in various formats")
argp.add_argument("file", help="input file in PseudoC format, or directory of such files")
argp.add_argument("-o", "--output", help="output file/dir (default stdout for single file, *.out for directory)")
argp.add_argument("--arch", default="xtensa", help="architecture to use")
argp.add_argument("--script", action="append", help="apply script from file")
argp.add_argument("--iter", action="store_true", help="apply transform iteratively until no changes to funcdb")
argp.add_argument("--funcdb", help="function database file (default: funcdb.yaml in input file's dir)")
argp.add_argument("--format", choices=["none", "bblocks", "asm", "c"], default="bblocks",
help="output format (default: %(default)s)")
argp.add_argument("--output-suffix", metavar="SUFFIX", default=".out", help="suffix for output files in same-dir mode (default: .out)")
argp.add_argument("--no-dead", action="store_true", help="don't output DCE-eliminated instructions")
argp.add_argument("--no-comments", action="store_true", help="don't output decompilation comments (annotations)")
argp.add_argument("--no-graph-header", action="store_true", help="don't output graph properties")
argp.add_argument("--annotate-calls", action="store_true", help="annotate calls with uses/defs")
argp.add_argument("--inst-addr", action="store_true", help="output instruction addresses")
argp.add_argument("--dot-inst", action="store_true", help="output instructions in .dot files")
argp.add_argument("--repr", action="store_true", help="dump __repr__ format of instructions and other objects")
argp.add_argument("--debug", action="store_true", help="produce debug files")
argp.add_argument("--log-level", metavar="LEVEL", default="INFO", help="set logging level (default: %(default)s)")
args = argp.parse_args()
if args.repr:
core.SimpleExpr.simple_repr = False
if args.inst_addr:
core.Inst.show_addr = True
if args.dot_inst:
import dot
dot.show_insts = True
return args
def handle_file(args):
try:
handle_file_unprotected(args)
except Exception as e:
print("Error while processing file: " + args.file)
raise e
def handle_file_unprotected(args):
p = Parser(args.file)
cfg = p.parse()
cfg.parser = p
# If we want to get asm back, i.e. stay close to the input, don't remove
# trailing jumps. This will work OK for data flow algos, but will produce
# broken or confusing output for control flow algos (for which asm output
# shouldn't be used of course).
# Update: it's unsafe to use this during dataflow analysis
#if args.format != "asm":
# foreach_bblock(cfg, remove_trailing_jumps)
if args.debug:
with open(args.file + ".0.bb", "w") as f:
dump_bblocks(cfg, f, no_graph_header=args.no_graph_header)
with open(args.file + ".0.dot", "w") as f:
dot.dot(cfg, f)
if args.script:
for s in args.script:
mod = __import__(s)
mod.apply(cfg)
elif hasattr(p, "script"):
for op_type, op_name in p.script:
if op_type == "xform:":
func = globals()[op_name]
func(cfg)
elif op_type == "xform_bblock:":
func = globals()[op_name]
foreach_bblock(cfg, func)
elif op_type == "xform_inst:":
func = globals()[op_name]
foreach_inst(cfg, func)
elif op_type == "script:":
mod = __import__(op_name)
mod.apply(cfg)
else:
assert 0
if args.debug:
with open(args.file + ".out.bb", "w") as f:
dump_bblocks(cfg, f, no_graph_header=args.no_graph_header)
with open(args.file + ".out.dot", "w") as f:
dot.dot(cfg, f)
if args.output and args.format != "none":
out = open(args.output, "w")
else:
out = sys.stdout
if args.no_comments:
Inst.show_comments = False
if args.format == "bblocks":
p = CFGPrinter(cfg, out)
if args.no_graph_header:
p.print_graph_header = lambda: None
p.inst_printer = repr if args.repr else str
p.no_dead = args.no_dead
p.print()
elif args.format == "asm":
p = AsmPrinter(cfg, out)
p.no_dead = args.no_dead
p.print()
elif args.format == "c":
#foreach_bblock(cfg, remove_trailing_jumps)
cfg.number_postorder()
Inst.trail = ";"
cprinter.no_dead = args.no_dead
cprinter.dump_c(cfg, out)
if out is not sys.stdout:
out.close()
progdb.update_funcdb(cfg)
return cfg
def one_iter(input, output, iter_no):
global FUNC_DB, FUNC_DB_ORG
if args.funcdb != "none":
dbs = []
if iter_no == 0 and os.path.exists(args.funcdb + ".in"):
dbs.append(args.funcdb + ".in")
if os.path.exists(args.funcdb):
dbs.append(args.funcdb)
progdb.load_funcdb(*dbs)
FUNC_DB = progdb.FUNC_DB_BY_ADDR
FUNC_DB_ORG = copy.deepcopy(FUNC_DB)
if args.script:
# If script has init() function, call it at the beginning of each
# iteration, this is useful to reset some state. E.g., if some
# funcdb property is calculated as a union, but we want to find
# its lower bound, we need to reset it to empty set at each
# iteration.
for s in args.script:
mod = __import__(s)
if hasattr(mod, "init"):
mod.init()
if os.path.isdir(input):
if output and not os.path.isdir(output):
os.makedirs(output)
for full_name in glob.glob(input + "/*"):
if full_name.endswith(".lst") and os.path.isfile(full_name):
if args.debug:
print(full_name)
args.file = full_name
if output:
base_name = full_name.rsplit("/", 1)[-1]
args.output = output + "/" + base_name
else:
args.output = full_name + args.output_suffix
handle_file(args)
else:
handle_file(args)
changed = FUNC_DB != FUNC_DB_ORG
if changed and args.funcdb != "none":
progdb.save_funcdb(args.funcdb)
return changed
if __name__ == "__main__":
args = parse_args()
if args.log_level:
logging.basicConfig(level=getattr(logging, args.log_level))
import arch
arch.load_arch(args.arch)
if args.annotate_calls:
core.Inst.annotate_calls = True
if not args.funcdb:
if os.path.isdir(args.file):
# For an input as directory, use this *input* directory
proj_dir = args.file
else:
# For a single file, use containing directory
proj_dir = os.path.dirname(args.file) or "."
args.funcdb = proj_dir + "/funcdb.yaml"
log.info("Using funcdb: %s", args.funcdb)
# Load binary data
import bindata
bindata.init(proj_dir)
# Load symtab
if os.path.exists(proj_dir + "/symtab.txt"):
log.info("Using symtab:", proj_dir + "/symtab.txt")
progdb.load_symtab(proj_dir + "/symtab.txt")
input = args.file
output = args.output
iter_no = 0
while True:
changed = one_iter(input, output, iter_no)
if not args.iter:
break
if args.debug:
print("=== Done iteration %d ===" % iter_no)
if not changed:
break
iter_no += 1