Refactor EP Perf Tool (#6202)

* merge master, keep postprocess status commit

* download float16.py everytime

* using variables to reference eps

* adding ACL EP to ep perf tool

* accuracy with absolute tolerance configurable

* add acl to dict + remove commented line
This commit is contained in:
Olivia Jain 2021-01-04 08:50:41 -08:00 committed by GitHub
parent 46e0e4e69f
commit c8de3f355a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 170 additions and 137 deletions

View file

@ -17,20 +17,29 @@ from perf_utils import *
import pprint
import time
from float16 import *
# import torch
debug = False
sys.path.append('.')
logger = logging.getLogger('')
ep_to_provider_list = {
"CPUExecutionProvider": ["CPUExecutionProvider"],
"CUDAExecutionProvider": ["CUDAExecutionProvider"],
"CUDAExecutionProvider_fp16": ["CUDAExecutionProvider"],
"TensorrtExecutionProvider": ["TensorrtExecutionProvider", "CUDAExecutionProvider"],
"TensorrtExecutionProvider_fp16": ["TensorrtExecutionProvider", "CUDAExecutionProvider"],
}
# global ep variables
cpu = "CPUExecutionProvider"
acl = "ACLExecutionProvider"
cuda = "CUDAExecutionProvider"
cuda_fp16 = "CUDAExecutionProvider_fp16"
trt = "TensorrtExecutionProvider"
trt_fp16 = "TensorrtExecutionProvider_fp16"
standalone_trt = "Standalone_TRT"
standalone_trt_fp16 = "Standalone_TRT_fp16"
ep_to_provider_list = {
cpu: [cpu],
acl: [acl],
cuda: [cuda],
cuda_fp16: [cuda],
trt: [trt, cuda],
trt_fp16: [trt, cuda],
}
# metadata
FAIL_MODEL_FILE = ".fail_model_map"
@ -212,6 +221,20 @@ def inference_ort_and_get_prediction(name, session, ort_inputs):
return ort_outputs
def get_acl_version():
from pathlib import Path
home = str(Path.home())
p = subprocess.run(["find", home, "-name", "libarm_compute.so"], check=True, stdout=subprocess.PIPE)
libarm_compute_path = p.stdout.decode("ascii").strip()
if libarm_compute_path == '':
return "No Compute Library Found"
else:
p = subprocess.run(["strings", libarm_compute_path], check=True, stdout=subprocess.PIPE)
libarm_so_strings = p.stdout.decode("ascii").strip()
version_match = re.search(r'arm_compute_version.*\n', libarm_so_strings)
version = version_match.group(0).split(' ')[0]
return version
def get_cuda_version():
from pathlib import Path
home = str(Path.home())
@ -396,14 +419,14 @@ def generate_onnx_model_random_input(test_times, ref_input):
return inputs
def validate(all_ref_outputs, all_outputs, decimal):
def validate(all_ref_outputs, all_outputs, rtol=0, atol=1.5):
if len(all_ref_outputs) == 0:
logger.info("No reference output provided.")
return True, None
logger.info('Reference {} results.'.format(len(all_ref_outputs)))
logger.info('Predicted {} results.'.format(len(all_outputs)))
logger.info('decimal {}'.format(decimal))
logger.info('rtol: {}, atol: {}'.format(rtol, atol))
try:
for i in range(len(all_outputs)):
@ -414,10 +437,10 @@ def validate(all_ref_outputs, all_outputs, decimal):
ref_output = ref_outputs[j]
output = outputs[j]
# Compare the results with reference outputs up to x decimal places
# Compare the results with reference outputs
for ref_o, o in zip(ref_output, output):
# abs(desired-actual) < 1.5 * 10**(-decimal)
np.testing.assert_almost_equal(ref_o, o, decimal)
# abs(desired-actual) < rtol * abs(desired) + atol
np.testing.assert_allclose(ref_o, o, rtol, atol)
except Exception as e:
logger.error(e)
return False, e
@ -483,7 +506,7 @@ def update_metrics_map(model_to_metrics, model_name, ep_to_operator):
if ep not in model_to_metrics[model_name]:
model_to_metrics[model_name][ep] = {}
if ep == "CUDAExecutionProvider" or ep == "CUDAExecutionProvider_fp16":
if ep == cuda or ep == cuda_fp16:
model_to_metrics[model_name][ep]['ratio_of_ops_in_cuda_not_fallback_cpu'] = calculate_cuda_op_percentage(op_map)
model_to_metrics[model_name][ep]['total_ops'] = get_total_ops(op_map)
else:
@ -504,13 +527,13 @@ def update_metrics_map_ori(model_to_metrics, name, ep_to_operator):
cuda_fp16_op_map = None
for ep, op_map in ep_to_operator.items():
if ep == "CUDAExecutionProvider":
if ep == cuda:
cuda_op_map = op_map
elif ep == "CUDAExecutionProvider_fp16":
elif ep == cuda_fp16:
cuda_fp16_op_map = op_map
elif ep == "TensorrtExecutionProvider":
elif ep == trt:
trt_op_map = op_map
elif ep == "TensorrtExecutionProvider_fp16":
elif ep == trt_fp16:
trt_fp16_op_map = op_map
@ -575,8 +598,8 @@ def update_fail_model_map(model_to_fail_ep, model_name, ep, e_type, e):
model_to_fail_ep[model_name][ep] = new_map
# If TRT fails, TRT FP16 should fail as well
if ep == 'TensorrtExecutionProvider':
ep_ = "TensorrtExecutionProvider_fp16"
if ep == trt:
ep_ = trt_fp16
e_ = "skip benchmarking since TRT failed already."
new_map_1 = {}
new_map_1["error_type"] = e_type
@ -595,8 +618,8 @@ def update_fail_model_map_ori(model_to_fail_ep, fail_results, model_name, ep, e_
update_fail_report(fail_results, model_name, ep, e_type, e)
# If TRT fails, TRT FP16 should fail as well
if ep == 'TensorrtExecutionProvider':
ep_ = "TensorrtExecutionProvider_fp16"
if ep == trt:
ep_ = trt_fp16
error_message_ = "skip benchmarking since TRT failed already."
update_fail_report(fail_results, model_name, ep_, e_type, error_message_)
model_to_fail_ep[model_name][ep_] = e_type
@ -771,10 +794,10 @@ def parse_models_info_from_directory(path, models):
parse_models_info_from_directory(os.path.join(path, dir), models)
def parse_models_info_from_file(default_dir, path, models):
def parse_models_info_from_file(root_dir, path, models):
# default working directory
root_working_directory = default_dir
root_working_directory = root_dir
with open(path) as f:
data = json.load(f)
@ -877,11 +900,11 @@ def run_onnxruntime(args, models):
ep_list.append(args.ep)
else:
if args.fp16:
ep_list = ["CPUExecutionProvider", "CUDAExecutionProvider", "TensorrtExecutionProvider", "CUDAExecutionProvider_fp16", "TensorrtExecutionProvider_fp16"]
ep_list = [cpu, cuda, trt, cuda_fp16, trt_fp16]
else:
ep_list = ["CPUExecutionProvider", "CUDAExecutionProvider", "TensorrtExecutionProvider"]
ep_list = [cpu, cuda, trt]
validation_exemption = ["TensorrtExecutionProvider_fp16"]
validation_exemption = [trt_fp16]
if os.path.exists(FAIL_MODEL_FILE):
@ -927,7 +950,7 @@ def run_onnxruntime(args, models):
model_path = model_info["model_path"]
test_data_dir = model_info["test_data_path"]
if ep == "CUDAExecutionProvider_fp16":
if ep == cuda_fp16:
logger.info("[Initialize] model = {}, ep = {} ,FP16 = True ...".format(name, ep))
fp16 = True
os.environ["ORT_TENSORRT_FP16_ENABLE"] = "1"
@ -952,7 +975,7 @@ def run_onnxruntime(args, models):
else:
inputs, ref_outputs = get_test_data(True, test_data_dir, all_inputs_shape)
elif ep == "TensorrtExecutionProvider_fp16":
elif ep == trt_fp16:
logger.info("[Initialize] model = {}, ep = {} ,FP16 = True ...".format(name, ep))
fp16 = True
os.environ["ORT_TENSORRT_FP16_ENABLE"] = "1"
@ -1020,13 +1043,13 @@ def run_onnxruntime(args, models):
latency_result[ep]["latency_90_percentile"] = result["latency_90_percentile"]
# get standalone TensorRT perf
if "TensorrtExecutionProvider" in ep and args.trtexec:
if trt in ep and args.trtexec:
result = run_trt_standalone(args.trtexec, model_path, sess.get_inputs(), all_inputs_shape, fp16)
if result and len(result) > 0:
if fp16:
latency_result["Standalone_TRT_fp16"] = result
latency_result[standalone_trt_fp16] = result
else:
latency_result["Standalone_TRT"] = result
latency_result[standalone_trt] = result
model_to_latency[name] = copy.deepcopy(latency_result)
@ -1072,8 +1095,7 @@ def run_onnxruntime(args, models):
try:
ort_outputs = inference_ort_and_get_prediction(name, sess, inputs)
decimal = 0
status = validate(ref_outputs, ort_outputs, decimal)
status = validate(ref_outputs, ort_outputs)
if not status[0]:
update_fail_model_map(model_to_fail_ep, name, ep, 'result accuracy issue', status[1])
continue
@ -1117,17 +1139,17 @@ def run_onnxruntime(args, models):
def add_improvement_information(model_to_latency):
for key, value in model_to_latency.items():
if not ('TensorrtExecutionProvider' in value and 'CUDAExecutionProvider' in value):
if not (trt in value and cuda in value):
continue
trt_latency = float(value['TensorrtExecutionProvider']['average_latency_ms'])
cuda_latency = float(value['CUDAExecutionProvider']['average_latency_ms'])
trt_latency = float(value[trt]['average_latency_ms'])
cuda_latency = float(value[cuda]['average_latency_ms'])
gain = (cuda_latency - trt_latency)*100/cuda_latency
value["Tensorrt_gain(%)"] = "{:.2f} %".format(gain)
if "TensorrtExecutionProvider_fp16" in value and "CUDAExecutionProvider_fp16" in value:
trt_fp16_latency = float(value['TensorrtExecutionProvider_fp16']['average_latency_ms'])
cuda_fp16_latency = float(value['CUDAExecutionProvider_fp16']['average_latency_ms'])
if trt_fp16 in value and cuda_fp16 in value:
trt_fp16_latency = float(value[trt_fp16]['average_latency_ms'])
cuda_fp16_latency = float(value[cuda_fp16]['average_latency_ms'])
gain = (cuda_fp16_latency - trt_fp16_latency)*100/cuda_fp16_latency
value["Tensorrt_fp16_gain(%)"] = "{:.2f} %".format(gain)
@ -1211,55 +1233,55 @@ def output_status(results, csv_filename):
with open(csv_filename, mode="a", newline='') as csv_file:
column_names = ["Model",
"CPU",
"CUDA fp32",
"TRT fp32",
"Standalone TRT fp32",
"CUDA fp16",
"TRT fp16",
"Standalone TRT fp16"
cpu,
cuda + " fp32",
trt + " fp32",
standalone_trt + " fp32",
cuda + " fp16",
trt + " fp16",
standalone_trt + "fp16"
]
csv_writer = csv.writer(csv_file)
if need_write_header:
csv_writer.writerow(column_names)
cpu = ""
cuda_fp32 = ""
trt_fp32 = ""
standalone_fp32 = ""
cuda_fp16 = ""
trt_fp16 = ""
standalone_fp16 = ""
cpu_status = ""
cuda_fp32_status = ""
trt_fp32_status = ""
standalone_fp32_status = ""
cuda_fp16_status = ""
trt_fp16_status = ""
standalone_fp16_status = ""
for model_name, ep_dict in results.items():
for ep, status in ep_dict.items():
if ep == "CPUExecutionProvider":
cpu = status
elif ep == "CUDAExecutionProvider":
cuda_fp32 = status
elif ep == "TensorrtExecutionProvider":
trt_fp32 = status
elif ep == "Standalone_TRT":
standalone_fp32 = status
elif ep == "CUDAExecutionProvider_fp16":
cuda_fp16 = status
elif ep == "TensorrtExecutionProvider_fp16":
trt_fp16 = status
elif ep == "Standalone_TRT_fp16":
standalone_fp16 = status
if ep == cpu:
cpu_status = status
elif ep == cuda:
cuda_fp32_status = status
elif ep == trt:
trt_fp32_status = status
elif ep == standalone_trt:
standalone_fp32_status = status
elif ep == cuda_fp16:
cuda_fp16_status = status
elif ep == trt_fp16:
trt_fp16_status = status
elif ep == standalone_trt_fp16:
standalone_fp16_status = status
else:
continue
row = [model_name,
cpu,
cuda_fp32,
trt_fp32,
standalone_fp32,
cuda_fp16,
trt_fp16,
standalone_fp16
]
cuda_fp32_status,
trt_fp32_status,
standalone_fp32_status,
cuda_fp16_status,
trt_fp16_status,
standalone_fp16_status]
csv_writer.writerow(row)
def output_latency(results, csv_filename):
@ -1292,61 +1314,61 @@ def output_latency(results, csv_filename):
for key, value in results.items():
cpu_average = ""
if "CPUExecutionProvider" in value and "average_latency_ms" in value["CPUExecutionProvider"]:
cpu_average = value["CPUExecutionProvider"]["average_latency_ms"]
if cpu in value and "average_latency_ms" in value[cpu]:
cpu_average = value[cpu]["average_latency_ms"]
cpu_90_percentile = ""
if "CPUExecutionProvider" in value and "latency_90_percentile" in value["CPUExecutionProvider"]:
cpu_90_percentile = value["CPUExecutionProvider"]["latency_90_percentile"]
if cpu in value and "latency_90_percentile" in value[cpu]:
cpu_90_percentile = value[cpu]["latency_90_percentile"]
cuda_average = ""
if 'CUDAExecutionProvider' in value and 'average_latency_ms' in value['CUDAExecutionProvider']:
cuda_average = value['CUDAExecutionProvider']['average_latency_ms']
if cuda in value and 'average_latency_ms' in value[cuda]:
cuda_average = value[cuda]['average_latency_ms']
cuda_90_percentile = ""
if 'CUDAExecutionProvider' in value and 'latency_90_percentile' in value['CUDAExecutionProvider']:
cuda_90_percentile = value['CUDAExecutionProvider']['latency_90_percentile']
if cuda in value and 'latency_90_percentile' in value[cuda]:
cuda_90_percentile = value[cuda]['latency_90_percentile']
trt_average = ""
if 'TensorrtExecutionProvider' in value and 'average_latency_ms' in value['TensorrtExecutionProvider']:
trt_average = value['TensorrtExecutionProvider']['average_latency_ms']
if trt in value and 'average_latency_ms' in value[trt]:
trt_average = value[trt]['average_latency_ms']
trt_90_percentile = ""
if 'TensorrtExecutionProvider' in value and 'latency_90_percentile' in value['TensorrtExecutionProvider']:
trt_90_percentile = value['TensorrtExecutionProvider']['latency_90_percentile']
if trt in value and 'latency_90_percentile' in value[trt]:
trt_90_percentile = value[trt]['latency_90_percentile']
standalone_trt_average = ""
if 'Standalone_TRT' in value and 'average_latency_ms' in value['Standalone_TRT']:
standalone_trt_average = value['Standalone_TRT']['average_latency_ms']
if standalone_trt in value and 'average_latency_ms' in value[standalone_trt]:
standalone_trt_average = value[standalone_trt]['average_latency_ms']
standalone_trt_90_percentile = ""
if 'Standalone_TRT' in value and 'latency_90_percentile' in value['Standalone_TRT']:
standalone_trt_90_percentile = value['Standalone_TRT']['latency_90_percentile']
if standalone_trt in value and 'latency_90_percentile' in value[standalone_trt]:
standalone_trt_90_percentile = value[standalone_trt]['latency_90_percentile']
cuda_fp16_average = ""
if 'CUDAExecutionProvider_fp16' in value and 'average_latency_ms' in value['CUDAExecutionProvider_fp16']:
cuda_fp16_average = value['CUDAExecutionProvider_fp16']['average_latency_ms']
if cuda_fp16 in value and 'average_latency_ms' in value[cuda_fp16]:
cuda_fp16_average = value[cuda_fp16]['average_latency_ms']
cuda_fp16_90_percentile = ""
if 'CUDAExecutionProvider_fp16' in value and 'latency_90_percentile' in value['CUDAExecutionProvider_fp16']:
cuda_fp16_90_percentile = value['CUDAExecutionProvider_fp16']['latency_90_percentile']
if cuda_fp16 in value and 'latency_90_percentile' in value[cuda_fp16]:
cuda_fp16_90_percentile = value[cuda_fp16]['latency_90_percentile']
trt_fp16_average = ""
if 'TensorrtExecutionProvider_fp16' in value and 'average_latency_ms' in value['TensorrtExecutionProvider_fp16']:
trt_fp16_average = value['TensorrtExecutionProvider_fp16']['average_latency_ms']
if trt_fp16 in value and 'average_latency_ms' in value[trt_fp16]:
trt_fp16_average = value[trt_fp16]['average_latency_ms']
trt_fp16_90_percentile = ""
if 'TensorrtExecutionProvider_fp16' in value and 'latency_90_percentile' in value['TensorrtExecutionProvider_fp16']:
trt_fp16_90_percentile = value['TensorrtExecutionProvider_fp16']['latency_90_percentile']
if trt_fp16 in value and 'latency_90_percentile' in value[trt_fp16]:
trt_fp16_90_percentile = value[trt_fp16]['latency_90_percentile']
standalone_trt_fp16_average = ""
if 'Standalone_TRT_fp16' in value and 'average_latency_ms' in value['Standalone_TRT_fp16']:
standalone_trt_fp16_average = value['Standalone_TRT_fp16']['average_latency_ms']
if standalone_trt in value and 'average_latency_ms' in value[standalone_trt_fp16]:
standalone_trt_fp16_average = value[standalone_trt]['average_latency_ms']
standalone_trt_fp16_90_percentile = ""
if 'Standalone_TRT_fp16' in value and 'latency_90_percentile' in value['Standalone_TRT_fp16']:
standalone_trt_fp16_90_percentile = value['Standalone_TRT_fp16']['latency_90_percentile']
if standalone_trt in value and 'latency_90_percentile' in value[standalone_trt_fp16]:
standalone_trt_fp16_90_percentile = value[standalone_trt]['latency_90_percentile']
row = [key,
@ -1390,41 +1412,41 @@ def output_metrics(model_to_metrics, csv_filename):
result["model_name"] = model
result_fp16["model_name"] = model + " (FP16)"
if "CUDAExecutionProvider" in ep_info:
result['ratio_of_ops_in_cuda_not_fallback_cpu'] = ep_info["CUDAExecutionProvider"]['ratio_of_ops_in_cuda_not_fallback_cpu']
if cuda in ep_info:
result['ratio_of_ops_in_cuda_not_fallback_cpu'] = ep_info[cuda]['ratio_of_ops_in_cuda_not_fallback_cpu']
if "TensorrtExecutionProvider" in ep_info:
result['total_trt_execution_time'] = ep_info["TensorrtExecutionProvider"]['total_trt_execution_time']
result['total_execution_time'] = ep_info["TensorrtExecutionProvider"]['total_execution_time']
result['ratio_of_execution_time_in_trt'] = ep_info["TensorrtExecutionProvider"]['ratio_of_execution_time_in_trt']
if trt in ep_info:
result['total_trt_execution_time'] = ep_info[trt]['total_trt_execution_time']
result['total_execution_time'] = ep_info[trt]['total_execution_time']
result['ratio_of_execution_time_in_trt'] = ep_info[trt]['ratio_of_execution_time_in_trt']
if "CUDAExecutionProvider" in ep_info and "TensorrtExecutionProvider" in ep_info:
if cuda in ep_info and trt in ep_info:
########################################################################################
# equation of % TRT ops:
# (total ops in cuda json - cuda and cpu ops in trt json)/ total ops in cuda json
########################################################################################
total_ops_in_cuda = ep_info["CUDAExecutionProvider"]["total_ops"]
cuda_cpu_ops_in_trt = ep_info["TensorrtExecutionProvider"]["total_ops"]
total_ops_in_cuda = ep_info[cuda]["total_ops"]
cuda_cpu_ops_in_trt = ep_info[trt]["total_ops"]
result['total_ops_in_trt'] = total_ops_in_cuda - cuda_cpu_ops_in_trt
result['total_ops'] = total_ops_in_cuda
result['ratio_of_ops_in_trt'] = (total_ops_in_cuda - cuda_cpu_ops_in_trt) / total_ops_in_cuda
if "CUDAExecutionProvider_fp16" in ep_info:
result_fp16['ratio_of_ops_in_cuda_not_fallback_cpu'] = ep_info["CUDAExecutionProvider_fp16"]['ratio_of_ops_in_cuda_not_fallback_cpu']
if cuda_fp16 in ep_info:
result_fp16['ratio_of_ops_in_cuda_not_fallback_cpu'] = ep_info[cuda_fp16]['ratio_of_ops_in_cuda_not_fallback_cpu']
if "TensorrtExecutionProvider_fp16" in ep_info:
result_fp16['total_trt_execution_time'] = ep_info["TensorrtExecutionProvider_fp16"]['total_trt_execution_time']
result_fp16['total_execution_time'] = ep_info["TensorrtExecutionProvider_fp16"]['total_execution_time']
result_fp16['ratio_of_execution_time_in_trt'] = ep_info["TensorrtExecutionProvider_fp16"]['ratio_of_execution_time_in_trt']
if trt_fp16 in ep_info:
result_fp16['total_trt_execution_time'] = ep_info[trt_fp16]['total_trt_execution_time']
result_fp16['total_execution_time'] = ep_info[trt_fp16]['total_execution_time']
result_fp16['ratio_of_execution_time_in_trt'] = ep_info[trt_fp16]['ratio_of_execution_time_in_trt']
if "CUDAExecutionProvider_fp16" in ep_info and "TensorrtExecutionProvider_fp16" in ep_info:
if cuda_fp16 in ep_info and trt_fp16 in ep_info:
########################################################################################
# equation of % TRT ops:
# (total ops in cuda json - cuda and cpu ops in trt json)/ total ops in cuda json
########################################################################################
total_ops_in_cuda = ep_info["CUDAExecutionProvider_fp16"]["total_ops"]
cuda_cpu_ops_in_trt = ep_info["TensorrtExecutionProvider_fp16"]["total_ops"]
total_ops_in_cuda = ep_info[cuda_fp16]["total_ops"]
cuda_cpu_ops_in_trt = ep_info[trt_fp16]["total_ops"]
result_fp16['total_ops_in_trt'] = total_ops_in_cuda - cuda_cpu_ops_in_trt
result_fp16['total_ops'] = total_ops_in_cuda
@ -1474,8 +1496,10 @@ def str2bool(v):
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument("-c", "--comparison", required=False, default="cuda_trt", choices=["cuda_trt", "acl"], help="EPs to compare: CPU vs. CUDA vs. TRT or CPU vs. ACL")
parser.add_argument("-d", "--default_dir", required=False, default="~/", help="Perf folder path")
parser.add_argument("-d", "--working_dir", required=False, default="./", help="Perf folder path")
parser.add_argument("-m", "--model_source", required=False, default="model_list.json", help="Model source: (1) model list file (2) model directory.")
@ -1518,7 +1542,7 @@ def setup_logger(verbose):
def parse_models_helper(args, models):
if ".json" in args.model_source:
logger.info("Parsing model information from file ...")
parse_models_info_from_file(args.default_dir, args.model_source, models)
parse_models_info_from_file(args.working_dir, args.model_source, models)
else:
logger.info("Parsing model information from directory ...")
parse_models_info_from_directory(args.model_source, models)

View file

@ -15,6 +15,14 @@ def write_model_info_to_file(model, path):
with open(path, 'w') as file:
file.write(json.dumps(model)) # use `json.loads` to do the reverse
def get_ep_list(comparison):
if comparison == 'acl':
ep_list = [cpu, acl]
else:
# test with cuda and trt
ep_list = [cpu, cuda, trt, cuda_fp16, trt_fp16]
return ep_list
def main():
args = parse_arguments()
setup_logger(False)
@ -41,13 +49,12 @@ def main():
model_list_file = os.path.join(os.getcwd(), model +'.json')
write_model_info_to_file([model_info], model_list_file)
ep_list = ["CPUExecutionProvider", "CUDAExecutionProvider", "TensorrtExecutionProvider", "CUDAExecutionProvider_fp16", "TensorrtExecutionProvider_fp16"]
ep_list = get_ep_list(args.comparison)
for ep in ep_list:
if args.running_mode == "validate":
p = subprocess.run(["python3",
"benchmark.py",
"-d", args.default_dir,
"-r", args.running_mode,
"-m", model_list_file,
"--ep", ep,
@ -58,7 +65,6 @@ def main():
elif args.running_mode == "benchmark":
p = subprocess.run(["python3",
"benchmark.py",
"-d", args.default_dir,
"-r", args.running_mode,
"-m", model_list_file,
"--ep", ep,

View file

@ -11,9 +11,6 @@ SYMBOLIC_SHAPE_INFER_LINK="https://raw.githubusercontent.com/microsoft/onnxrunti
FLOAT_16="float16.py"
FLOAT_16_LINK="https://raw.githubusercontent.com/microsoft/onnxconverter-common/master/onnxconverter_common/float16.py"
# root working directory
DEFAULT_DIR="./"
cleanup_files() {
rm -f $FAIL_MODEL_FILE
rm -f $LATENCY_FILE
@ -36,8 +33,8 @@ update_files() {
if [ "$1" == "many-models" ]
then
update_files
python3 benchmark_wrapper.py -d $DEFAULT_DIR -r validate -m /home/hcsuser/mount/many-models -o result/"$1"
python3 benchmark_wrapper.py -d $DEFAULT_DIR -r benchmark -i random -t 10 -m /home/hcsuser/mount/many-models -o result/"$1"
python3 benchmark_wrapper.py -r validate -m /home/hcsuser/mount/many-models -o result/"$1"
python3 benchmark_wrapper.py -r benchmark -i random -t 10 -m /home/hcsuser/mount/many-models -o result/"$1"
fi
# ONNX model zoo
@ -45,8 +42,8 @@ if [ "$1" == "onnx-zoo-models" ]
then
MODEL_LIST="model_list.json"
update_files
python3 benchmark_wrapper.py -d $DEFAULT_DIR -r validate -m $MODEL_LIST -o result/"$1"
python3 benchmark_wrapper.py -d $DEFAULT_DIR -r benchmark -i random -t 10 -m $MODEL_LIST -o result/"$1"
python3 benchmark_wrapper.py -r validate -m $MODEL_LIST -o result/"$1"
python3 benchmark_wrapper.py -r benchmark -i random -t 10 -m $MODEL_LIST -o result/"$1"
fi
# 1P models
@ -54,8 +51,8 @@ if [ "$1" == "partner-models" ]
then
MODEL_LIST="partner_model_list.json"
update_files
python3 benchmark_wrapper.py -d $DEFAULT_DIR -r validate -m $MODEL_LIST -o result/"$1"
python3 benchmark_wrapper.py -d $DEFAULT_DIR -r benchmark -i random -t 10 -m $MODEL_LIST -o result/"$1"
python3 benchmark_wrapper.py -r validate -m $MODEL_LIST -o result/"$1"
python3 benchmark_wrapper.py -r benchmark -i random -t 10 -m $MODEL_LIST -o result/"$1"
fi
# Test models
@ -63,6 +60,6 @@ if [ "$1" == "selected-models" ]
then
MODEL_LIST="selected_models.json"
update_files
python3 benchmark_wrapper.py -d $DEFAULT_DIR -r validate -m $MODEL_LIST -o result/"$1"
python3 benchmark_wrapper.py -d $DEFAULT_DIR -r benchmark -i random -t 1 -m $MODEL_LIST -o result/"$1"
python3 benchmark_wrapper.py -r validate -m $MODEL_LIST -o result/"$1"
python3 benchmark_wrapper.py -r benchmark -i random -t 1 -m $MODEL_LIST -o result/"$1"
fi

View file

@ -8,6 +8,12 @@ import re
debug = False
debug_verbose = False
def find(regex_string):
import glob
results = glob.glob(regex_string)
results.sort()
return results
def get_latest_commit_hash():
p1 = subprocess.Popen(["git", "rev-parse", "--short", "HEAD"], stdout = subprocess.PIPE)
stdout, sterr = p1.communicate()