diff --git a/onnxruntime/python/tools/tensorrt/perf/benchmark.py b/onnxruntime/python/tools/tensorrt/perf/benchmark.py index 6d17d67f46..36bd22fb1e 100644 --- a/onnxruntime/python/tools/tensorrt/perf/benchmark.py +++ b/onnxruntime/python/tools/tensorrt/perf/benchmark.py @@ -17,20 +17,29 @@ from perf_utils import * import pprint import time from float16 import * -# import torch debug = False sys.path.append('.') logger = logging.getLogger('') -ep_to_provider_list = { - "CPUExecutionProvider": ["CPUExecutionProvider"], - "CUDAExecutionProvider": ["CUDAExecutionProvider"], - "CUDAExecutionProvider_fp16": ["CUDAExecutionProvider"], - "TensorrtExecutionProvider": ["TensorrtExecutionProvider", "CUDAExecutionProvider"], - "TensorrtExecutionProvider_fp16": ["TensorrtExecutionProvider", "CUDAExecutionProvider"], -} +# global ep variables +cpu = "CPUExecutionProvider" +acl = "ACLExecutionProvider" +cuda = "CUDAExecutionProvider" +cuda_fp16 = "CUDAExecutionProvider_fp16" +trt = "TensorrtExecutionProvider" +trt_fp16 = "TensorrtExecutionProvider_fp16" +standalone_trt = "Standalone_TRT" +standalone_trt_fp16 = "Standalone_TRT_fp16" +ep_to_provider_list = { + cpu: [cpu], + acl: [acl], + cuda: [cuda], + cuda_fp16: [cuda], + trt: [trt, cuda], + trt_fp16: [trt, cuda], +} # metadata FAIL_MODEL_FILE = ".fail_model_map" @@ -212,6 +221,20 @@ def inference_ort_and_get_prediction(name, session, ort_inputs): return ort_outputs +def get_acl_version(): + from pathlib import Path + home = str(Path.home()) + p = subprocess.run(["find", home, "-name", "libarm_compute.so"], check=True, stdout=subprocess.PIPE) + libarm_compute_path = p.stdout.decode("ascii").strip() + if libarm_compute_path == '': + return "No Compute Library Found" + else: + p = subprocess.run(["strings", libarm_compute_path], check=True, stdout=subprocess.PIPE) + libarm_so_strings = p.stdout.decode("ascii").strip() + version_match = re.search(r'arm_compute_version.*\n', libarm_so_strings) + version = version_match.group(0).split(' ')[0] + return version + def get_cuda_version(): from pathlib import Path home = str(Path.home()) @@ -396,14 +419,14 @@ def generate_onnx_model_random_input(test_times, ref_input): return inputs -def validate(all_ref_outputs, all_outputs, decimal): +def validate(all_ref_outputs, all_outputs, rtol=0, atol=1.5): if len(all_ref_outputs) == 0: logger.info("No reference output provided.") return True, None logger.info('Reference {} results.'.format(len(all_ref_outputs))) logger.info('Predicted {} results.'.format(len(all_outputs))) - logger.info('decimal {}'.format(decimal)) + logger.info('rtol: {}, atol: {}'.format(rtol, atol)) try: for i in range(len(all_outputs)): @@ -414,10 +437,10 @@ def validate(all_ref_outputs, all_outputs, decimal): ref_output = ref_outputs[j] output = outputs[j] - # Compare the results with reference outputs up to x decimal places + # Compare the results with reference outputs for ref_o, o in zip(ref_output, output): - # abs(desired-actual) < 1.5 * 10**(-decimal) - np.testing.assert_almost_equal(ref_o, o, decimal) + # abs(desired-actual) < rtol * abs(desired) + atol + np.testing.assert_allclose(ref_o, o, rtol, atol) except Exception as e: logger.error(e) return False, e @@ -483,7 +506,7 @@ def update_metrics_map(model_to_metrics, model_name, ep_to_operator): if ep not in model_to_metrics[model_name]: model_to_metrics[model_name][ep] = {} - if ep == "CUDAExecutionProvider" or ep == "CUDAExecutionProvider_fp16": + if ep == cuda or ep == cuda_fp16: model_to_metrics[model_name][ep]['ratio_of_ops_in_cuda_not_fallback_cpu'] = calculate_cuda_op_percentage(op_map) model_to_metrics[model_name][ep]['total_ops'] = get_total_ops(op_map) else: @@ -504,13 +527,13 @@ def update_metrics_map_ori(model_to_metrics, name, ep_to_operator): cuda_fp16_op_map = None for ep, op_map in ep_to_operator.items(): - if ep == "CUDAExecutionProvider": + if ep == cuda: cuda_op_map = op_map - elif ep == "CUDAExecutionProvider_fp16": + elif ep == cuda_fp16: cuda_fp16_op_map = op_map - elif ep == "TensorrtExecutionProvider": + elif ep == trt: trt_op_map = op_map - elif ep == "TensorrtExecutionProvider_fp16": + elif ep == trt_fp16: trt_fp16_op_map = op_map @@ -575,8 +598,8 @@ def update_fail_model_map(model_to_fail_ep, model_name, ep, e_type, e): model_to_fail_ep[model_name][ep] = new_map # If TRT fails, TRT FP16 should fail as well - if ep == 'TensorrtExecutionProvider': - ep_ = "TensorrtExecutionProvider_fp16" + if ep == trt: + ep_ = trt_fp16 e_ = "skip benchmarking since TRT failed already." new_map_1 = {} new_map_1["error_type"] = e_type @@ -595,8 +618,8 @@ def update_fail_model_map_ori(model_to_fail_ep, fail_results, model_name, ep, e_ update_fail_report(fail_results, model_name, ep, e_type, e) # If TRT fails, TRT FP16 should fail as well - if ep == 'TensorrtExecutionProvider': - ep_ = "TensorrtExecutionProvider_fp16" + if ep == trt: + ep_ = trt_fp16 error_message_ = "skip benchmarking since TRT failed already." update_fail_report(fail_results, model_name, ep_, e_type, error_message_) model_to_fail_ep[model_name][ep_] = e_type @@ -771,10 +794,10 @@ def parse_models_info_from_directory(path, models): parse_models_info_from_directory(os.path.join(path, dir), models) -def parse_models_info_from_file(default_dir, path, models): +def parse_models_info_from_file(root_dir, path, models): # default working directory - root_working_directory = default_dir + root_working_directory = root_dir with open(path) as f: data = json.load(f) @@ -877,11 +900,11 @@ def run_onnxruntime(args, models): ep_list.append(args.ep) else: if args.fp16: - ep_list = ["CPUExecutionProvider", "CUDAExecutionProvider", "TensorrtExecutionProvider", "CUDAExecutionProvider_fp16", "TensorrtExecutionProvider_fp16"] + ep_list = [cpu, cuda, trt, cuda_fp16, trt_fp16] else: - ep_list = ["CPUExecutionProvider", "CUDAExecutionProvider", "TensorrtExecutionProvider"] + ep_list = [cpu, cuda, trt] - validation_exemption = ["TensorrtExecutionProvider_fp16"] + validation_exemption = [trt_fp16] if os.path.exists(FAIL_MODEL_FILE): @@ -927,7 +950,7 @@ def run_onnxruntime(args, models): model_path = model_info["model_path"] test_data_dir = model_info["test_data_path"] - if ep == "CUDAExecutionProvider_fp16": + if ep == cuda_fp16: logger.info("[Initialize] model = {}, ep = {} ,FP16 = True ...".format(name, ep)) fp16 = True os.environ["ORT_TENSORRT_FP16_ENABLE"] = "1" @@ -952,7 +975,7 @@ def run_onnxruntime(args, models): else: inputs, ref_outputs = get_test_data(True, test_data_dir, all_inputs_shape) - elif ep == "TensorrtExecutionProvider_fp16": + elif ep == trt_fp16: logger.info("[Initialize] model = {}, ep = {} ,FP16 = True ...".format(name, ep)) fp16 = True os.environ["ORT_TENSORRT_FP16_ENABLE"] = "1" @@ -1020,13 +1043,13 @@ def run_onnxruntime(args, models): latency_result[ep]["latency_90_percentile"] = result["latency_90_percentile"] # get standalone TensorRT perf - if "TensorrtExecutionProvider" in ep and args.trtexec: + if trt in ep and args.trtexec: result = run_trt_standalone(args.trtexec, model_path, sess.get_inputs(), all_inputs_shape, fp16) if result and len(result) > 0: if fp16: - latency_result["Standalone_TRT_fp16"] = result + latency_result[standalone_trt_fp16] = result else: - latency_result["Standalone_TRT"] = result + latency_result[standalone_trt] = result model_to_latency[name] = copy.deepcopy(latency_result) @@ -1072,8 +1095,7 @@ def run_onnxruntime(args, models): try: ort_outputs = inference_ort_and_get_prediction(name, sess, inputs) - decimal = 0 - status = validate(ref_outputs, ort_outputs, decimal) + status = validate(ref_outputs, ort_outputs) if not status[0]: update_fail_model_map(model_to_fail_ep, name, ep, 'result accuracy issue', status[1]) continue @@ -1117,17 +1139,17 @@ def run_onnxruntime(args, models): def add_improvement_information(model_to_latency): for key, value in model_to_latency.items(): - if not ('TensorrtExecutionProvider' in value and 'CUDAExecutionProvider' in value): + if not (trt in value and cuda in value): continue - trt_latency = float(value['TensorrtExecutionProvider']['average_latency_ms']) - cuda_latency = float(value['CUDAExecutionProvider']['average_latency_ms']) + trt_latency = float(value[trt]['average_latency_ms']) + cuda_latency = float(value[cuda]['average_latency_ms']) gain = (cuda_latency - trt_latency)*100/cuda_latency value["Tensorrt_gain(%)"] = "{:.2f} %".format(gain) - if "TensorrtExecutionProvider_fp16" in value and "CUDAExecutionProvider_fp16" in value: - trt_fp16_latency = float(value['TensorrtExecutionProvider_fp16']['average_latency_ms']) - cuda_fp16_latency = float(value['CUDAExecutionProvider_fp16']['average_latency_ms']) + if trt_fp16 in value and cuda_fp16 in value: + trt_fp16_latency = float(value[trt_fp16]['average_latency_ms']) + cuda_fp16_latency = float(value[cuda_fp16]['average_latency_ms']) gain = (cuda_fp16_latency - trt_fp16_latency)*100/cuda_fp16_latency value["Tensorrt_fp16_gain(%)"] = "{:.2f} %".format(gain) @@ -1211,55 +1233,55 @@ def output_status(results, csv_filename): with open(csv_filename, mode="a", newline='') as csv_file: column_names = ["Model", - "CPU", - "CUDA fp32", - "TRT fp32", - "Standalone TRT fp32", - "CUDA fp16", - "TRT fp16", - "Standalone TRT fp16" + cpu, + cuda + " fp32", + trt + " fp32", + standalone_trt + " fp32", + cuda + " fp16", + trt + " fp16", + standalone_trt + "fp16" ] + csv_writer = csv.writer(csv_file) if need_write_header: csv_writer.writerow(column_names) - cpu = "" - cuda_fp32 = "" - trt_fp32 = "" - standalone_fp32 = "" - cuda_fp16 = "" - trt_fp16 = "" - standalone_fp16 = "" + cpu_status = "" + cuda_fp32_status = "" + trt_fp32_status = "" + standalone_fp32_status = "" + cuda_fp16_status = "" + trt_fp16_status = "" + standalone_fp16_status = "" for model_name, ep_dict in results.items(): for ep, status in ep_dict.items(): - if ep == "CPUExecutionProvider": - cpu = status - elif ep == "CUDAExecutionProvider": - cuda_fp32 = status - elif ep == "TensorrtExecutionProvider": - trt_fp32 = status - elif ep == "Standalone_TRT": - standalone_fp32 = status - elif ep == "CUDAExecutionProvider_fp16": - cuda_fp16 = status - elif ep == "TensorrtExecutionProvider_fp16": - trt_fp16 = status - elif ep == "Standalone_TRT_fp16": - standalone_fp16 = status + if ep == cpu: + cpu_status = status + elif ep == cuda: + cuda_fp32_status = status + elif ep == trt: + trt_fp32_status = status + elif ep == standalone_trt: + standalone_fp32_status = status + elif ep == cuda_fp16: + cuda_fp16_status = status + elif ep == trt_fp16: + trt_fp16_status = status + elif ep == standalone_trt_fp16: + standalone_fp16_status = status else: continue row = [model_name, cpu, - cuda_fp32, - trt_fp32, - standalone_fp32, - cuda_fp16, - trt_fp16, - standalone_fp16 - ] + cuda_fp32_status, + trt_fp32_status, + standalone_fp32_status, + cuda_fp16_status, + trt_fp16_status, + standalone_fp16_status] csv_writer.writerow(row) def output_latency(results, csv_filename): @@ -1292,61 +1314,61 @@ def output_latency(results, csv_filename): for key, value in results.items(): cpu_average = "" - if "CPUExecutionProvider" in value and "average_latency_ms" in value["CPUExecutionProvider"]: - cpu_average = value["CPUExecutionProvider"]["average_latency_ms"] + if cpu in value and "average_latency_ms" in value[cpu]: + cpu_average = value[cpu]["average_latency_ms"] cpu_90_percentile = "" - if "CPUExecutionProvider" in value and "latency_90_percentile" in value["CPUExecutionProvider"]: - cpu_90_percentile = value["CPUExecutionProvider"]["latency_90_percentile"] + if cpu in value and "latency_90_percentile" in value[cpu]: + cpu_90_percentile = value[cpu]["latency_90_percentile"] cuda_average = "" - if 'CUDAExecutionProvider' in value and 'average_latency_ms' in value['CUDAExecutionProvider']: - cuda_average = value['CUDAExecutionProvider']['average_latency_ms'] + if cuda in value and 'average_latency_ms' in value[cuda]: + cuda_average = value[cuda]['average_latency_ms'] cuda_90_percentile = "" - if 'CUDAExecutionProvider' in value and 'latency_90_percentile' in value['CUDAExecutionProvider']: - cuda_90_percentile = value['CUDAExecutionProvider']['latency_90_percentile'] + if cuda in value and 'latency_90_percentile' in value[cuda]: + cuda_90_percentile = value[cuda]['latency_90_percentile'] trt_average = "" - if 'TensorrtExecutionProvider' in value and 'average_latency_ms' in value['TensorrtExecutionProvider']: - trt_average = value['TensorrtExecutionProvider']['average_latency_ms'] + if trt in value and 'average_latency_ms' in value[trt]: + trt_average = value[trt]['average_latency_ms'] trt_90_percentile = "" - if 'TensorrtExecutionProvider' in value and 'latency_90_percentile' in value['TensorrtExecutionProvider']: - trt_90_percentile = value['TensorrtExecutionProvider']['latency_90_percentile'] + if trt in value and 'latency_90_percentile' in value[trt]: + trt_90_percentile = value[trt]['latency_90_percentile'] standalone_trt_average = "" - if 'Standalone_TRT' in value and 'average_latency_ms' in value['Standalone_TRT']: - standalone_trt_average = value['Standalone_TRT']['average_latency_ms'] + if standalone_trt in value and 'average_latency_ms' in value[standalone_trt]: + standalone_trt_average = value[standalone_trt]['average_latency_ms'] standalone_trt_90_percentile = "" - if 'Standalone_TRT' in value and 'latency_90_percentile' in value['Standalone_TRT']: - standalone_trt_90_percentile = value['Standalone_TRT']['latency_90_percentile'] + if standalone_trt in value and 'latency_90_percentile' in value[standalone_trt]: + standalone_trt_90_percentile = value[standalone_trt]['latency_90_percentile'] cuda_fp16_average = "" - if 'CUDAExecutionProvider_fp16' in value and 'average_latency_ms' in value['CUDAExecutionProvider_fp16']: - cuda_fp16_average = value['CUDAExecutionProvider_fp16']['average_latency_ms'] + if cuda_fp16 in value and 'average_latency_ms' in value[cuda_fp16]: + cuda_fp16_average = value[cuda_fp16]['average_latency_ms'] cuda_fp16_90_percentile = "" - if 'CUDAExecutionProvider_fp16' in value and 'latency_90_percentile' in value['CUDAExecutionProvider_fp16']: - cuda_fp16_90_percentile = value['CUDAExecutionProvider_fp16']['latency_90_percentile'] + if cuda_fp16 in value and 'latency_90_percentile' in value[cuda_fp16]: + cuda_fp16_90_percentile = value[cuda_fp16]['latency_90_percentile'] trt_fp16_average = "" - if 'TensorrtExecutionProvider_fp16' in value and 'average_latency_ms' in value['TensorrtExecutionProvider_fp16']: - trt_fp16_average = value['TensorrtExecutionProvider_fp16']['average_latency_ms'] + if trt_fp16 in value and 'average_latency_ms' in value[trt_fp16]: + trt_fp16_average = value[trt_fp16]['average_latency_ms'] trt_fp16_90_percentile = "" - if 'TensorrtExecutionProvider_fp16' in value and 'latency_90_percentile' in value['TensorrtExecutionProvider_fp16']: - trt_fp16_90_percentile = value['TensorrtExecutionProvider_fp16']['latency_90_percentile'] + if trt_fp16 in value and 'latency_90_percentile' in value[trt_fp16]: + trt_fp16_90_percentile = value[trt_fp16]['latency_90_percentile'] standalone_trt_fp16_average = "" - if 'Standalone_TRT_fp16' in value and 'average_latency_ms' in value['Standalone_TRT_fp16']: - standalone_trt_fp16_average = value['Standalone_TRT_fp16']['average_latency_ms'] + if standalone_trt in value and 'average_latency_ms' in value[standalone_trt_fp16]: + standalone_trt_fp16_average = value[standalone_trt]['average_latency_ms'] standalone_trt_fp16_90_percentile = "" - if 'Standalone_TRT_fp16' in value and 'latency_90_percentile' in value['Standalone_TRT_fp16']: - standalone_trt_fp16_90_percentile = value['Standalone_TRT_fp16']['latency_90_percentile'] + if standalone_trt in value and 'latency_90_percentile' in value[standalone_trt_fp16]: + standalone_trt_fp16_90_percentile = value[standalone_trt]['latency_90_percentile'] row = [key, @@ -1390,41 +1412,41 @@ def output_metrics(model_to_metrics, csv_filename): result["model_name"] = model result_fp16["model_name"] = model + " (FP16)" - if "CUDAExecutionProvider" in ep_info: - result['ratio_of_ops_in_cuda_not_fallback_cpu'] = ep_info["CUDAExecutionProvider"]['ratio_of_ops_in_cuda_not_fallback_cpu'] + if cuda in ep_info: + result['ratio_of_ops_in_cuda_not_fallback_cpu'] = ep_info[cuda]['ratio_of_ops_in_cuda_not_fallback_cpu'] - if "TensorrtExecutionProvider" in ep_info: - result['total_trt_execution_time'] = ep_info["TensorrtExecutionProvider"]['total_trt_execution_time'] - result['total_execution_time'] = ep_info["TensorrtExecutionProvider"]['total_execution_time'] - result['ratio_of_execution_time_in_trt'] = ep_info["TensorrtExecutionProvider"]['ratio_of_execution_time_in_trt'] + if trt in ep_info: + result['total_trt_execution_time'] = ep_info[trt]['total_trt_execution_time'] + result['total_execution_time'] = ep_info[trt]['total_execution_time'] + result['ratio_of_execution_time_in_trt'] = ep_info[trt]['ratio_of_execution_time_in_trt'] - if "CUDAExecutionProvider" in ep_info and "TensorrtExecutionProvider" in ep_info: + if cuda in ep_info and trt in ep_info: ######################################################################################## # equation of % TRT ops: # (total ops in cuda json - cuda and cpu ops in trt json)/ total ops in cuda json ######################################################################################## - total_ops_in_cuda = ep_info["CUDAExecutionProvider"]["total_ops"] - cuda_cpu_ops_in_trt = ep_info["TensorrtExecutionProvider"]["total_ops"] + total_ops_in_cuda = ep_info[cuda]["total_ops"] + cuda_cpu_ops_in_trt = ep_info[trt]["total_ops"] result['total_ops_in_trt'] = total_ops_in_cuda - cuda_cpu_ops_in_trt result['total_ops'] = total_ops_in_cuda result['ratio_of_ops_in_trt'] = (total_ops_in_cuda - cuda_cpu_ops_in_trt) / total_ops_in_cuda - if "CUDAExecutionProvider_fp16" in ep_info: - result_fp16['ratio_of_ops_in_cuda_not_fallback_cpu'] = ep_info["CUDAExecutionProvider_fp16"]['ratio_of_ops_in_cuda_not_fallback_cpu'] + if cuda_fp16 in ep_info: + result_fp16['ratio_of_ops_in_cuda_not_fallback_cpu'] = ep_info[cuda_fp16]['ratio_of_ops_in_cuda_not_fallback_cpu'] - if "TensorrtExecutionProvider_fp16" in ep_info: - result_fp16['total_trt_execution_time'] = ep_info["TensorrtExecutionProvider_fp16"]['total_trt_execution_time'] - result_fp16['total_execution_time'] = ep_info["TensorrtExecutionProvider_fp16"]['total_execution_time'] - result_fp16['ratio_of_execution_time_in_trt'] = ep_info["TensorrtExecutionProvider_fp16"]['ratio_of_execution_time_in_trt'] + if trt_fp16 in ep_info: + result_fp16['total_trt_execution_time'] = ep_info[trt_fp16]['total_trt_execution_time'] + result_fp16['total_execution_time'] = ep_info[trt_fp16]['total_execution_time'] + result_fp16['ratio_of_execution_time_in_trt'] = ep_info[trt_fp16]['ratio_of_execution_time_in_trt'] - if "CUDAExecutionProvider_fp16" in ep_info and "TensorrtExecutionProvider_fp16" in ep_info: + if cuda_fp16 in ep_info and trt_fp16 in ep_info: ######################################################################################## # equation of % TRT ops: # (total ops in cuda json - cuda and cpu ops in trt json)/ total ops in cuda json ######################################################################################## - total_ops_in_cuda = ep_info["CUDAExecutionProvider_fp16"]["total_ops"] - cuda_cpu_ops_in_trt = ep_info["TensorrtExecutionProvider_fp16"]["total_ops"] + total_ops_in_cuda = ep_info[cuda_fp16]["total_ops"] + cuda_cpu_ops_in_trt = ep_info[trt_fp16]["total_ops"] result_fp16['total_ops_in_trt'] = total_ops_in_cuda - cuda_cpu_ops_in_trt result_fp16['total_ops'] = total_ops_in_cuda @@ -1474,8 +1496,10 @@ def str2bool(v): def parse_arguments(): parser = argparse.ArgumentParser() + + parser.add_argument("-c", "--comparison", required=False, default="cuda_trt", choices=["cuda_trt", "acl"], help="EPs to compare: CPU vs. CUDA vs. TRT or CPU vs. ACL") - parser.add_argument("-d", "--default_dir", required=False, default="~/", help="Perf folder path") + parser.add_argument("-d", "--working_dir", required=False, default="./", help="Perf folder path") parser.add_argument("-m", "--model_source", required=False, default="model_list.json", help="Model source: (1) model list file (2) model directory.") @@ -1518,7 +1542,7 @@ def setup_logger(verbose): def parse_models_helper(args, models): if ".json" in args.model_source: logger.info("Parsing model information from file ...") - parse_models_info_from_file(args.default_dir, args.model_source, models) + parse_models_info_from_file(args.working_dir, args.model_source, models) else: logger.info("Parsing model information from directory ...") parse_models_info_from_directory(args.model_source, models) diff --git a/onnxruntime/python/tools/tensorrt/perf/benchmark_wrapper.py b/onnxruntime/python/tools/tensorrt/perf/benchmark_wrapper.py index 03a8b573b4..c69dee72bf 100644 --- a/onnxruntime/python/tools/tensorrt/perf/benchmark_wrapper.py +++ b/onnxruntime/python/tools/tensorrt/perf/benchmark_wrapper.py @@ -15,6 +15,14 @@ def write_model_info_to_file(model, path): with open(path, 'w') as file: file.write(json.dumps(model)) # use `json.loads` to do the reverse +def get_ep_list(comparison): + if comparison == 'acl': + ep_list = [cpu, acl] + else: + # test with cuda and trt + ep_list = [cpu, cuda, trt, cuda_fp16, trt_fp16] + return ep_list + def main(): args = parse_arguments() setup_logger(False) @@ -41,13 +49,12 @@ def main(): model_list_file = os.path.join(os.getcwd(), model +'.json') write_model_info_to_file([model_info], model_list_file) - ep_list = ["CPUExecutionProvider", "CUDAExecutionProvider", "TensorrtExecutionProvider", "CUDAExecutionProvider_fp16", "TensorrtExecutionProvider_fp16"] + ep_list = get_ep_list(args.comparison) for ep in ep_list: if args.running_mode == "validate": p = subprocess.run(["python3", "benchmark.py", - "-d", args.default_dir, "-r", args.running_mode, "-m", model_list_file, "--ep", ep, @@ -58,7 +65,6 @@ def main(): elif args.running_mode == "benchmark": p = subprocess.run(["python3", "benchmark.py", - "-d", args.default_dir, "-r", args.running_mode, "-m", model_list_file, "--ep", ep, diff --git a/onnxruntime/python/tools/tensorrt/perf/perf.sh b/onnxruntime/python/tools/tensorrt/perf/perf.sh index bc52eae561..abc19c9752 100755 --- a/onnxruntime/python/tools/tensorrt/perf/perf.sh +++ b/onnxruntime/python/tools/tensorrt/perf/perf.sh @@ -11,9 +11,6 @@ SYMBOLIC_SHAPE_INFER_LINK="https://raw.githubusercontent.com/microsoft/onnxrunti FLOAT_16="float16.py" FLOAT_16_LINK="https://raw.githubusercontent.com/microsoft/onnxconverter-common/master/onnxconverter_common/float16.py" -# root working directory -DEFAULT_DIR="./" - cleanup_files() { rm -f $FAIL_MODEL_FILE rm -f $LATENCY_FILE @@ -36,8 +33,8 @@ update_files() { if [ "$1" == "many-models" ] then update_files - python3 benchmark_wrapper.py -d $DEFAULT_DIR -r validate -m /home/hcsuser/mount/many-models -o result/"$1" - python3 benchmark_wrapper.py -d $DEFAULT_DIR -r benchmark -i random -t 10 -m /home/hcsuser/mount/many-models -o result/"$1" + python3 benchmark_wrapper.py -r validate -m /home/hcsuser/mount/many-models -o result/"$1" + python3 benchmark_wrapper.py -r benchmark -i random -t 10 -m /home/hcsuser/mount/many-models -o result/"$1" fi # ONNX model zoo @@ -45,8 +42,8 @@ if [ "$1" == "onnx-zoo-models" ] then MODEL_LIST="model_list.json" update_files - python3 benchmark_wrapper.py -d $DEFAULT_DIR -r validate -m $MODEL_LIST -o result/"$1" - python3 benchmark_wrapper.py -d $DEFAULT_DIR -r benchmark -i random -t 10 -m $MODEL_LIST -o result/"$1" + python3 benchmark_wrapper.py -r validate -m $MODEL_LIST -o result/"$1" + python3 benchmark_wrapper.py -r benchmark -i random -t 10 -m $MODEL_LIST -o result/"$1" fi # 1P models @@ -54,8 +51,8 @@ if [ "$1" == "partner-models" ] then MODEL_LIST="partner_model_list.json" update_files - python3 benchmark_wrapper.py -d $DEFAULT_DIR -r validate -m $MODEL_LIST -o result/"$1" - python3 benchmark_wrapper.py -d $DEFAULT_DIR -r benchmark -i random -t 10 -m $MODEL_LIST -o result/"$1" + python3 benchmark_wrapper.py -r validate -m $MODEL_LIST -o result/"$1" + python3 benchmark_wrapper.py -r benchmark -i random -t 10 -m $MODEL_LIST -o result/"$1" fi # Test models @@ -63,6 +60,6 @@ if [ "$1" == "selected-models" ] then MODEL_LIST="selected_models.json" update_files - python3 benchmark_wrapper.py -d $DEFAULT_DIR -r validate -m $MODEL_LIST -o result/"$1" - python3 benchmark_wrapper.py -d $DEFAULT_DIR -r benchmark -i random -t 1 -m $MODEL_LIST -o result/"$1" + python3 benchmark_wrapper.py -r validate -m $MODEL_LIST -o result/"$1" + python3 benchmark_wrapper.py -r benchmark -i random -t 1 -m $MODEL_LIST -o result/"$1" fi diff --git a/onnxruntime/python/tools/tensorrt/perf/perf_utils.py b/onnxruntime/python/tools/tensorrt/perf/perf_utils.py index dcf1958705..2767e8e2a9 100644 --- a/onnxruntime/python/tools/tensorrt/perf/perf_utils.py +++ b/onnxruntime/python/tools/tensorrt/perf/perf_utils.py @@ -8,6 +8,12 @@ import re debug = False debug_verbose = False +def find(regex_string): + import glob + results = glob.glob(regex_string) + results.sort() + return results + def get_latest_commit_hash(): p1 = subprocess.Popen(["git", "rev-parse", "--short", "HEAD"], stdout = subprocess.PIPE) stdout, sterr = p1.communicate()