-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmlharness_main.py
More file actions
350 lines (300 loc) · 16.9 KB
/
mlharness_main.py
File metadata and controls
350 lines (300 loc) · 16.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
"""
mlperf inference benchmarking tool
"""
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import array
import collections
import json
import logging
import os
import sys
import threading
import time
from queue import Queue
import subprocess
import mlperf_loadgen as lg
import numpy as np
from mlmodelscope.dataloader import DataLoader
from mlmodelscope.outputprocessor import OutputProcessor
from mlmodelscope.processor_name import get_cpu_name, get_gpu_name
import pydldataset
from tracer import Tracer
logging.basicConfig(level=logging.INFO)
log = logging.getLogger("main")
NANO_SEC = 1e9
MILLI_SEC = 1000
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
SCENARIO_MAP = {
"SingleStream": lg.TestScenario.SingleStream,
"MultiStream": lg.TestScenario.MultiStream,
"Server": lg.TestScenario.Server,
"Offline": lg.TestScenario.Offline,
}
last_timeing = []
result_timeing = []
last_loaded = -1
TRACE_LEVEL = ( "NO_TRACE",
"APPLICATION_TRACE",
"MODEL_TRACE", # pipelines within model
"FRAMEWORK_TRACE", # layers within framework
"ML_LIBRARY_TRACE", # cudnn, ...
"SYSTEM_LIBRARY_TRACE", # cupti
"HARDWARE_TRACE", # perf, papi, ...
"FULL_TRACE") # includes all of the above)
BACKENDS = ("pytorch", "onnxruntime", "tensorflow", "mxnet", "jax")
def get_args():
"""Parse commandline."""
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", default='cnn', choices=['coco', 'imagenet', 'squad', 'brats2019', 'cnn'], help="select accuracy script for dataset")
parser.add_argument("--scenario", default="SingleStream",
help="mlcommons inference benchmark scenario, one of " + str(list(SCENARIO_MAP.keys())))
# in MLPerf the default max-batchsize value is 128, but in Onnxruntime some models can only support size of 1
parser.add_argument("--max_batchsize", type=int, default=1, help="max batch size in a single inference")
parser.add_argument("--backend", default='pytorch', choices=BACKENDS, help="runtime to use")
parser.add_argument("--task", type=str, nargs='?', default="summarization", help="The name of the task to predict.")
parser.add_argument("--model_name", type=str, nargs='?', default="gpt_j", help="The name of the model")
parser.add_argument("--qps", type=int, help="target qps")
# parser.add_argument("--accuracy", action="store_true", help="enable accuracy pass")
parser.add_argument("--accuracy", default=True, help="enable accuracy pass")
parser.add_argument("--find_peak_performance", action="store_true", help="enable finding peak performance pass")
# file to use mlperf rules compliant parameters
parser.add_argument("--mlperf_conf", default="./inference/mlperf.conf", help="mlperf rules config")
# file for user LoadGen settings such as target QPS
# parser.add_argument("--user_conf", default="./inference/vision/classification_and_detection/user.conf", help="user config for user LoadGen settings such as target QPS")
parser.add_argument("--user_conf", default="./inference/language/gpt-j/user.conf", help="user config for user LoadGen settings such as target QPS")
# log path for loadgen
parser.add_argument("--log_dir", default='./logs')
# below will override mlperf rules compliant settings - don't use for official submission
parser.add_argument("--time", type=int, help="time to scan in seconds")
parser.add_argument("--count", type=int, default=10, help="dataset items to use")
parser.add_argument("--max_latency", type=float, help="mlperf max latency in pct tile")
parser.add_argument("--samples_per_query", type=int, help="mlperf multi-stream sample per query")
# MLHarness Parameters
parser.add_argument("--use_gpu", type=int, default=1, help="enable gpu for inference")
parser.add_argument("--gpu_id", type=int, default=0, help="which GPU")
parser.add_argument("--trace_level", type=str, nargs='?', default="NO_TRACE", choices=TRACE_LEVEL, help="MLModelScope Trace Level")
parser.add_argument("--gpu_trace", type=str, nargs='?', default="false", choices=["false", "true"], help="Whether to trace GPU activities")
parser.add_argument("--save_trace_result", type=str, nargs='?', default="false", choices=["false", "true"], help="Whether to save the trace result")
parser.add_argument("--save_trace_result_path", type=str, nargs='?', default="trace_result.txt", help="The path of the trace result file")
# py-mlmodelscope Parameters
parser.add_argument("--security_check", type=str, nargs='?', default="false", choices=["false", "true"], help="Whether to perform security check on the model file")
parser.add_argument("--config_file", type=str, nargs='?', default="false", choices=["false", "true"], help="Whether to use config file (.json)")
parser.add_argument("--config_file_path", type=str, nargs='?', default="config.json", help="The path of the config file")
# Modality Specific
# inv_map for object detection
parser.add_argument("--use_inv_map", action="store_true", help="use inv_map for object detection")
args = parser.parse_args()
if args.scenario not in SCENARIO_MAP:
parser.error("valid scenarios:" + str(list(SCENARIO_MAP.keys())))
return args
def main():
global last_timeing
global last_loaded
global result_timeing
args = get_args()
log.info(args)
# --count applies to accuracy mode only and can be used to limit the number of images
# for testing. For perf model we always limit count to 200.
count_override = False
count = args.count
if count:
count_override = True
# dataset to use
dataset_name = args.dataset
dataset = pydldataset.load(dataset_name, count=count)
# load model
backend = args.backend
task = args.task
model_name = args.model_name
config = None
if args.config_file == "true":
config_file_path = args.config_file_path
try:
with open(config_file_path, 'r') as f:
config = json.load(f)
print(f"config file {config_file_path} is loaded")
except (json.JSONDecodeError, FileNotFoundError) as e:
print(f"config file {config_file_path} is not loaded: {e}")
architecture = 'cpu' if args.use_gpu == 0 else 'gpu'
trace_level = args.trace_level
gpu_trace = True if (TRACE_LEVEL.index(trace_level) >= TRACE_LEVEL.index("SYSTEM_LIBRARY_TRACE")) and (args.gpu_trace == "true") else False
security_check = True if args.security_check == "true" else False
save_trace_result = True if (args.save_trace_result == "true") and (trace_level != "NO_TRACE") else False
save_trace_result_path = args.save_trace_result_path if save_trace_result else None
tracer, root_span, ctx = Tracer.create(trace_level=trace_level, save_trace_result_path=save_trace_result_path)
root_span.set_attribute("cpu_name", get_cpu_name())
c = None
if architecture == "gpu" and gpu_trace and tracer.is_trace_enabled("SYSTEM_LIBRARY_TRACE"):
root_span.set_attribute("gpu_name", get_gpu_name())
from pycupti import CUPTI
c = CUPTI(tracer=tracer)
print("CUPTI version", c.cuptiGetVersion())
output_processor = OutputProcessor()
user = 'default'
if backend == 'pytorch':
from mlmodelscope.pytorch_agent import PyTorch_Agent
agent = PyTorch_Agent(task, model_name, architecture, tracer, ctx, security_check, config, user, c)
elif backend == 'tensorflow':
from mlmodelscope.tensorflow_agent import TensorFlow_Agent
agent = TensorFlow_Agent(task, model_name, architecture, tracer, ctx, security_check, config, user)
elif backend == 'onnxruntime':
from mlmodelscope.onnxruntime_agent import ONNXRuntime_Agent
agent = ONNXRuntime_Agent(task, model_name, architecture, tracer, ctx, security_check, config, user)
elif backend == 'mxnet':
from mlmodelscope.mxnet_agent import MXNet_Agent
agent = MXNet_Agent(task, model_name, architecture, tracer, ctx, security_check, config, user)
elif backend == 'jax':
from mlmodelscope.jax_agent import JAX_Agent
agent = JAX_Agent(task, model_name, architecture, tracer, ctx, security_check, config, user)
else:
raise NotImplementedError(f"{backend} agent is not supported")
mlperf_conf = os.path.abspath(args.mlperf_conf)
if not os.path.exists(mlperf_conf):
log.error("{} not found".format(mlperf_conf))
sys.exit(1)
user_conf = os.path.abspath(args.user_conf)
if not os.path.exists(user_conf):
log.error("{} not found".format(user_conf))
sys.exit(1)
log_dir = None
if args.log_dir:
log_dir = os.path.abspath(args.log_dir)
os.makedirs(log_dir, exist_ok=True)
#
# make one pass over the dataset to validate accuracy
#
count = dataset.get_item_count()
# warmup
dataset.load([0])
for _ in range(5):
img = dataset.get_samples([0])
# _ = backend.predict({backend.inputs[0]: img})
agent.predict(0, DataLoader(img, args.max_batchsize), output_processor)
dataset.unload(None)
scenario = SCENARIO_MAP[args.scenario]
# for lg.ConstructSUT()
def issue_queries(query_samples):
global so
global last_timeing
global result_timeing
idx = np.array([q.index for q in query_samples]).astype(np.int32)
query_id = [q.id for q in query_samples]
if args.dataset == 'brats2019':
start = time.time()
response_array_refs = []
response = []
for i, qid in enumerate(query_id):
# processed_results = so.IssueQuery(1, idx[i][np.newaxis])
processed_results = agent.predict(0, DataLoader(dataset.get_samples(idx[i][np.newaxis]), args.max_batchsize), output_processor, mlharness=True)
# processed_results = json.loads(processed_results.decode('utf-8'))
for j in range(len(processed_results[index])):
processed_results[index][j] = [idx[index]] + processed_results[index][j]
response_array = array.array("B", np.array(idx[index] + processed_results[0], np.float16).tobytes())
response_array_refs.append(response_array)
bi = response_array.buffer_info()
response.append(lg.QuerySampleResponse(qid, bi[0], bi[1]))
result_timeing.append(time.time() - start)
lg.QuerySamplesComplete(response)
else:
start = time.time()
# processed_results = so.IssueQuery(len(idx), idx)
processed_results = agent.predict(0, DataLoader(dataset.get_samples(idx), args.max_batchsize), output_processor, mlharness=True)
result_timeing.append(time.time() - start)
# processed_results = json.loads(processed_results.decode('utf-8'))
response_array_refs = []
response = []
for index, qid in enumerate(query_id):
if args.dataset == 'coco':
for j in range(len(processed_results[index])):
processed_results[index][j] = [idx[index]] + processed_results[index][j]
dtype = np.int64 if args.dataset == 'cnn' else np.float32
response_array = array.array("B", np.array(processed_results[index], dtype).tobytes())
response_array_refs.append(response_array)
bi = response_array.buffer_info()
response.append(lg.QuerySampleResponse(qid, bi[0], bi[1]))
lg.QuerySamplesComplete(response)
# for lg.ConstructSUT()
def flush_queries():
pass
settings = lg.TestSettings()
if args.model_name != "":
settings.FromConfig(mlperf_conf, args.model_name, args.scenario)
settings.FromConfig(user_conf, args.model_name, args.scenario)
settings.scenario = scenario
settings.mode = lg.TestMode.PerformanceOnly
if args.accuracy:
settings.mode = lg.TestMode.AccuracyOnly
if args.find_peak_performance:
settings.mode = lg.TestMode.FindPeakPerformance
if args.time:
# override the time we want to run
settings.min_duration_ms = args.time * MILLI_SEC
settings.max_duration_ms = args.time * MILLI_SEC
if args.qps:
qps = float(args.qps)
settings.server_target_qps = qps
settings.offline_expected_qps = qps
if count_override:
settings.min_query_count = count
settings.max_query_count = count
if args.samples_per_query:
settings.multi_stream_samples_per_query = args.samples_per_query
if args.max_latency:
settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC)
# sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
sut = lg.ConstructSUT(issue_queries, flush_queries)
qsl = lg.ConstructQSL(count, min(count, 500), dataset.load, dataset.unload)
log.info("starting {}".format(scenario))
log_path = os.path.realpath(args.log_dir)
log_output_settings = lg.LogOutputSettings()
log_output_settings.outdir = log_path
log_output_settings.copy_summary_to_stdout = True
log_settings = lg.LogSettings()
log_settings.log_output = log_output_settings
# log_settings.enable_trace = True
# lg.StartTest(sut, qsl, settings)
lg.StartTestWithLogSettings(sut, qsl, settings, log_settings)
if not last_timeing:
last_timeing = result_timeing
if args.accuracy:
accuracy_script_paths = {'coco': os.path.realpath('./inference/vision/classification_and_detection/tools/accuracy-coco.py'),
'imagenet': os.path.realpath('./inference/vision/classification_and_detection/tools/accuracy-imagenet.py'),
'squad': os.path.realpath('./inference/language/bert/accuracy-squad.py'),
'brats2019': os.path.realpath('./inference/vision/medical_imaging/3d-unet/accuracy-brats.py'),
'cnn': os.path.realpath('./inference/language/gpt-j/evaluation.py')}
accuracy_script_path = accuracy_script_paths[args.dataset]
accuracy_file_path = os.path.join(log_dir, 'mlperf_log_accuracy.json')
data_dir = os.environ['DATA_DIR']
if args.dataset == 'coco':
if args.use_inv_map:
subprocess.check_call('python3 {} --mlperf-accuracy-file {} --coco-dir {} --use-inv-map'.format(accuracy_script_path, accuracy_file_path, data_dir), shell=True)
else:
subprocess.check_call('python3 {} --mlperf-accuracy-file {} --coco-dir {}'.format(accuracy_script_path, accuracy_file_path, data_dir), shell=True)
elif args.dataset == 'imagenet': # imagenet
subprocess.check_call('python3 {} --mlperf-accuracy-file {} --imagenet-val-file {}'.format(accuracy_script_path, accuracy_file_path, os.path.join(data_dir, 'val_map.txt')), shell=True)
elif args.dataset == 'squad': # squad
vocab_path = os.path.join(data_dir, 'vocab.txt')
val_path = os.path.join(data_dir, 'dev-v1.1.json')
out_path = os.path.join(log_dir, 'predictions.json')
cache_path = os.path.join(data_dir, 'eval_features.pickle')
subprocess.check_call('python3 {} --vocab_file {} --val_data {} --log_file {} --out_file {} --features_cache_file {} --max_examples {}'.
format(accuracy_script_path, vocab_path, val_path, accuracy_file_path, out_path, cache_path, count), shell=True)
elif args.dataset == 'brats2019': # brats2019
base_dir = os.path.realpath('./inference/vision/medical_imaging/3d-unet/build')
post_dir = os.path.join(base_dir, 'postprocessed_data')
label_dir = os.path.join(base_dir, 'raw_data/nnUNet_raw_data/Task043_BraTS2019/labelsTr')
os.makedirs(post_dir, exist_ok=True)
subprocess.check_call('python3 {} --log_file {} --preprocessed_data_dir {} --postprocessed_data_dir {} --label_data_dir {}'.
format(accuracy_script_path, accuracy_file_path, data_dir, post_dir, label_dir), shell=True)
elif args.dataset == 'cnn': # cnn
subprocess.check_call('python3 {} --mlperf-accuracy-file {} --dataset-file {}'.format(accuracy_script_path, accuracy_file_path, os.path.join(data_dir, 'cnn_eval.json')), shell=True)
else:
raise RuntimeError('Dataset not Implemented.')
lg.DestroyQSL(qsl)
lg.DestroySUT(sut)
if __name__ == "__main__":
main()