mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-19 21:32:23 +00:00
Custom parameters (#10964)
* get inputs independently for trtexec * track one process only * remove engine and profile files * change time to commit time * add runtime option for io binding * move to commit date * fixes * add option for graph optimization * cleanup docker script * note second time creation * allow for parameters to be configured from pipeline at runtime * uncomment * include optional arguments at runtime * post second session creation * update cmake version * Revert "update cmake version" This reverts commit 09a1364eae68610724c8e90eeea777b7ee03f74b. * Move data format import
This commit is contained in:
parent
9a3be9b46a
commit
de384805cd
7 changed files with 71 additions and 36 deletions
|
|
@ -939,6 +939,9 @@ def run_onnxruntime(args, models):
|
|||
model_to_fail_ep = {} # model -> failing ep
|
||||
model_to_session = {} # models -> session creation time
|
||||
|
||||
if args.running_mode == "benchmark":
|
||||
model_to_session = read_map_from_file(SESSION_FILE)
|
||||
|
||||
ep_list = []
|
||||
if args.ep:
|
||||
ep_list.append(args.ep)
|
||||
|
|
@ -995,9 +998,9 @@ def run_onnxruntime(args, models):
|
|||
# Set environment variables for ort-trt benchmarking
|
||||
if "ORT-TRT" in ep:
|
||||
os.environ["ORT_TENSORRT_FP16_ENABLE"] = "1" if "Fp16" in ep else "0"
|
||||
os.environ["ORT_TENSORRT_ENGINE_CACHE_ENABLE"] = "1"
|
||||
os.environ["ORT_TENSORRT_MAX_WORKSPACE_SIZE"] = "4294967296"
|
||||
|
||||
if args.enable_cache:
|
||||
os.environ["ORT_TENSORRT_ENGINE_CACHE_ENABLE"] = "1"
|
||||
fp16 = False
|
||||
|
||||
# use float16.py for cuda fp16 only
|
||||
|
|
@ -1054,6 +1057,7 @@ def run_onnxruntime(args, models):
|
|||
# resolve providers to create session
|
||||
providers = ep_to_provider_list[ep]
|
||||
options = onnxruntime.SessionOptions()
|
||||
|
||||
enablement = args.graph_enablement
|
||||
if enablement == enable_all:
|
||||
options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
|
||||
|
|
@ -1066,12 +1070,15 @@ def run_onnxruntime(args, models):
|
|||
|
||||
# create onnxruntime inference session
|
||||
try:
|
||||
sess, _ = create_session(model_path, providers, options)
|
||||
sess, second_creation_time = create_session(model_path, providers, options)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
update_fail_model_map(model_to_fail_ep, name, ep, 'runtime error', e)
|
||||
continue
|
||||
|
||||
if second_creation_time:
|
||||
model_to_session[name] = copy.deepcopy({ep + second: second_creation_time})
|
||||
|
||||
logger.info("start to inference {} with {} ...".format(name, ep))
|
||||
logger.info(sess.get_providers())
|
||||
|
|
@ -1091,6 +1098,8 @@ def run_onnxruntime(args, models):
|
|||
"device": ep,
|
||||
"fp16": fp16,
|
||||
"io_binding": args.io_binding,
|
||||
"graph_optimizations": args.graph_enablement,
|
||||
"enable_cache": args.enable_cache,
|
||||
"model_name": name,
|
||||
"inputs": len(sess.get_inputs()),
|
||||
"batch_size": batch_size,
|
||||
|
|
@ -1110,6 +1119,7 @@ def run_onnxruntime(args, models):
|
|||
continue
|
||||
|
||||
if result:
|
||||
|
||||
latency_result[ep] = {}
|
||||
latency_result[ep]["average_latency_ms"] = result["average_latency_ms"]
|
||||
latency_result[ep]["latency_90_percentile"] = result["latency_90_percentile"]
|
||||
|
|
@ -1241,9 +1251,7 @@ def output_details(results, csv_filename):
|
|||
|
||||
with open(csv_filename, mode="a", newline='') as csv_file:
|
||||
column_names = [
|
||||
"engine", "version", "device", "fp16", "io_binding", "model_name", "inputs", "batch_size",
|
||||
"sequence_length", "datetime", "test_times", "QPS", "average_latency_ms", "latency_variance",
|
||||
"latency_90_percentile", "latency_95_percentile", "latency_99_percentile"
|
||||
"engine", "version", "device", "fp16", "io_binding", "graph_optimizations", "enable_cache", "model_name", "inputs", "batch_size", "sequence_length", "datetime", "test_times", "QPS", "average_latency_ms", "latency_variance", "latency_90_percentile", "latency_95_percentile", "latency_99_percentile"
|
||||
]
|
||||
|
||||
csv_writer = csv.DictWriter(csv_file, fieldnames=column_names)
|
||||
|
|
@ -1374,10 +1382,9 @@ def output_session_creation(results, csv_filename):
|
|||
need_write_header = False
|
||||
|
||||
with open(csv_filename, mode="a", newline='') as csv_file:
|
||||
column_names = [model_title]
|
||||
for provider in ort_provider_list:
|
||||
column_names.append(provider + session_ending)
|
||||
|
||||
session_1 = [p + session_ending for p in ort_provider_list]
|
||||
session_2 = [p + second_session_ending for p in ort_provider_list]
|
||||
column_names = [model_title] + session_1 + session_2
|
||||
csv_writer = csv.writer(csv_file)
|
||||
|
||||
|
||||
|
|
@ -1391,6 +1398,11 @@ def output_session_creation(results, csv_filename):
|
|||
trt_fp32_time = ""
|
||||
cuda_fp16_time = ""
|
||||
trt_fp16_time = ""
|
||||
cpu_time_2 = ""
|
||||
cuda_fp32_time_2 = ""
|
||||
trt_fp32_time_2 = ""
|
||||
cuda_fp16_time_2 = ""
|
||||
trt_fp16_time_2 = ""
|
||||
|
||||
for model_name, ep_dict in results.items():
|
||||
for ep, time in ep_dict.items():
|
||||
|
|
@ -1404,6 +1416,16 @@ def output_session_creation(results, csv_filename):
|
|||
cuda_fp16_time = time
|
||||
elif ep == trt_fp16:
|
||||
trt_fp16_time = time
|
||||
if ep == cpu + second:
|
||||
cpu_time_2 = time
|
||||
elif ep == cuda + second:
|
||||
cuda_fp32_time_2 = time
|
||||
elif ep == trt + second:
|
||||
trt_fp32_time_2 = time
|
||||
elif ep == cuda_fp16 + second:
|
||||
cuda_fp16_time_2 = time
|
||||
elif ep == trt_fp16 + second:
|
||||
trt_fp16_time_2 = time
|
||||
else:
|
||||
continue
|
||||
|
||||
|
|
@ -1412,7 +1434,12 @@ def output_session_creation(results, csv_filename):
|
|||
cuda_fp32_time,
|
||||
trt_fp32_time,
|
||||
cuda_fp16_time,
|
||||
trt_fp16_time]
|
||||
trt_fp16_time,
|
||||
cpu_time_2,
|
||||
cuda_fp32_time_2,
|
||||
trt_fp32_time_2,
|
||||
cuda_fp16_time_2,
|
||||
trt_fp16_time_2]
|
||||
csv_writer.writerow(row)
|
||||
|
||||
|
||||
|
|
@ -1659,15 +1686,17 @@ def parse_arguments():
|
|||
|
||||
parser.add_argument("-w", "--workspace", required=False, default="/", help="Workspace to find tensorrt and perf script (with models if parsing with model file)")
|
||||
|
||||
parser.add_argument("--track_memory", required=False, default=True, help="Track CUDA and TRT Memory Usage")
|
||||
|
||||
parser.add_argument("--io_binding", required=False, default=False, help="Bind Inputs")
|
||||
parser.add_argument("-e", "--ep_list", nargs="+", required=False, default=None, help="Specify ORT Execution Providers list.")
|
||||
|
||||
parser.add_argument("--graph_enablement", required=False, default=enable_all, choices=[disable, basic, extended, enable_all], help="Choose graph optimization enablement.")
|
||||
parser.add_argument("-z", "--track_memory", required=False, default=True, help="Track CUDA and TRT Memory Usage")
|
||||
|
||||
parser.add_argument("--ep", required=False, default=None, help="Specify ORT Execution Provider.")
|
||||
parser.add_argument("-b", "--io_binding", required=False, default=False, help="Bind Inputs")
|
||||
|
||||
parser.add_argument("--ep_list", nargs="+", required=False, default=None, help="Specify ORT Execution Providers list.")
|
||||
parser.add_argument("-g", "--graph_enablement", required=False, default=enable_all, choices=[disable, basic, extended, enable_all], help="Choose graph optimization enablement.")
|
||||
|
||||
parser.add_argument("-n", "--enable_cache", required=False, default=True, help="Enable ORT-TRT Caching")
|
||||
|
||||
parser.add_argument("--ep", required=False, default=None, help="Specify ORT Execution Provider.")
|
||||
|
||||
parser.add_argument("--fp16", required=False, default=True, action="store_true", help="Inlcude Float16 into benchmarking.")
|
||||
|
||||
|
|
@ -1703,12 +1732,13 @@ def setup_logger(verbose):
|
|||
logging.getLogger("transformers").setLevel(logging.WARNING)
|
||||
|
||||
def parse_models_helper(args, models):
|
||||
if ".json" in args.model_source:
|
||||
model_source = os.path.join(args.workspace, args.model_source)
|
||||
if ".json" in model_source:
|
||||
logger.info("Parsing model information from file ...")
|
||||
parse_models_info_from_file(args.workspace, args.model_source, models)
|
||||
parse_models_info_from_file(args.workspace, model_source, models)
|
||||
else:
|
||||
logger.info("Parsing model information from directory ...")
|
||||
parse_models_info_from_directory(args.model_source, models)
|
||||
parse_models_info_from_directory(model_source, models)
|
||||
|
||||
def main():
|
||||
args = parse_arguments()
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ def main():
|
|||
|
||||
model_list_file = os.path.join(os.getcwd(), model +'.json')
|
||||
write_model_info_to_file([model_info], model_list_file)
|
||||
|
||||
|
||||
for ep in ep_list:
|
||||
|
||||
command = ["python3",
|
||||
|
|
@ -120,6 +120,7 @@ def main():
|
|||
output_metrics(model_to_metrics, os.path.join(path, benchmark_metrics_csv))
|
||||
logger.info("\nSaved model metrics results to {}".format(benchmark_metrics_csv))
|
||||
|
||||
elif args.running_mode == "benchmark":
|
||||
logger.info("\n=========================================")
|
||||
logger.info("======= Models/EPs session creation =======")
|
||||
logger.info("=========================================")
|
||||
|
|
@ -129,8 +130,7 @@ def main():
|
|||
pretty_print(pp, model_to_session)
|
||||
output_session_creation(model_to_session, os.path.join(path, benchmark_session_csv))
|
||||
logger.info("\nSaved session creation results to {}".format(benchmark_session_csv))
|
||||
|
||||
elif args.running_mode == "benchmark":
|
||||
|
||||
logger.info("\n=========================================================")
|
||||
logger.info("========== Failing Models/EPs (accumulated) ==============")
|
||||
logger.info("==========================================================")
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
#!/bin/bash
|
||||
|
||||
while getopts d:o:m:w:e: parameter
|
||||
while getopts d:o:m:e:a: parameter
|
||||
do case "${parameter}"
|
||||
in
|
||||
d) PERF_DIR=${OPTARG};;
|
||||
o) OPTION=${OPTARG};;
|
||||
m) MODEL_PATH=${OPTARG};;
|
||||
w) WORKSPACE=${OPTARG};;
|
||||
e) EP_LIST=${OPTARG};;
|
||||
a) OPTIONAL_ARGS=${OPTARG};;
|
||||
esac
|
||||
done
|
||||
|
||||
|
|
@ -15,7 +15,8 @@ done
|
|||
RUN_EPS=""
|
||||
if [ ! -z "$EP_LIST" ]
|
||||
then
|
||||
RUN_EPS="--ep_list $EP_LIST"
|
||||
RUN_EPS=" -e $EP_LIST"
|
||||
OPTIONAL_ARGS=$OPTIONAL_ARGS$RUN_EPS
|
||||
fi
|
||||
|
||||
# change dir if docker
|
||||
|
|
@ -54,5 +55,5 @@ setup() {
|
|||
}
|
||||
|
||||
setup
|
||||
python3 benchmark_wrapper.py -r validate -m $MODEL_PATH -o result/$OPTION -w $WORKSPACE $RUN_EPS
|
||||
python3 benchmark_wrapper.py -r benchmark -t 1200 -m $MODEL_PATH -o result/$OPTION -w $WORKSPACE $RUN_EPS
|
||||
python3 benchmark_wrapper.py -r validate -m $MODEL_PATH -o result/$OPTION $OPTIONAL_ARGS
|
||||
python3 benchmark_wrapper.py -r benchmark -t 1200 -m $MODEL_PATH -o result/$OPTION $OPTIONAL_ARGS
|
||||
|
|
|
|||
|
|
@ -41,11 +41,13 @@ model_title = 'Model'
|
|||
group_title = 'Group'
|
||||
|
||||
# endings
|
||||
second = "_second"
|
||||
csv_ending = '.csv'
|
||||
avg_ending = ' \nmean (ms)'
|
||||
percentile_ending = ' \n90th percentile (ms)'
|
||||
memory_ending = ' \npeak memory usage (MiB)'
|
||||
session_ending = ' \n session creation time (s)'
|
||||
second_session_ending = ' \n second session creation time (s)'
|
||||
ort_provider_list = [cpu, cuda, trt, cuda_fp16, trt_fp16]
|
||||
provider_list = [cpu, cuda, trt, standalone_trt, cuda_fp16, trt_fp16, standalone_trt_fp16]
|
||||
table_headers = [model_title] + provider_list
|
||||
|
|
|
|||
|
|
@ -4,10 +4,10 @@ import os
|
|||
import pandas as pd
|
||||
import time
|
||||
from azure.kusto.data import KustoConnectionStringBuilder
|
||||
from azure.kusto.data.data_format import DataFormat
|
||||
from azure.kusto.data.helpers import dataframe_from_result_table
|
||||
from azure.kusto.ingest import (
|
||||
IngestionProperties,
|
||||
DataFormat,
|
||||
ReportLevel,
|
||||
QueuedIngestClient,
|
||||
)
|
||||
|
|
@ -91,7 +91,7 @@ def get_specs(specs, branch, commit_id, date_time):
|
|||
|
||||
def get_session(session, model_group):
|
||||
session_columns = session.keys()
|
||||
session_db_columns = [model_title] + ort_provider_list
|
||||
session_db_columns = [model_title] + ort_provider_list + [p + second for p in ort_provider_list]
|
||||
session = adjust_columns(session, session_columns, session_db_columns, model_group)
|
||||
return session
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Parse Arguments
|
||||
while getopts d:o:m:p:e:v: parameter
|
||||
while getopts d:o:m:p:e:v:a: parameter
|
||||
do case "${parameter}"
|
||||
in
|
||||
d) DOCKER_IMAGE=${OPTARG};;
|
||||
|
|
@ -10,12 +10,11 @@ m) MODEL_PATH=${OPTARG};;
|
|||
p) PERF_DIR=${OPTARG};;
|
||||
e) EP_LIST=${OPTARG};;
|
||||
v) MODEL_VOLUME=${OPTARG};;
|
||||
a) PERF_ARGUMENTS=${OPTARG};;
|
||||
esac
|
||||
done
|
||||
|
||||
# Variables
|
||||
DOCKER_PERF_DIR='/perf/'
|
||||
WORKSPACE='/'
|
||||
MODEL_PATH=$WORKSPACE$MODEL_PATH
|
||||
|
||||
docker run --gpus all -v $PERF_DIR:$DOCKER_PERF_DIR -v $MODEL_VOLUME/$OPTION:$DOCKER_PERF_DIR$OPTION $DOCKER_IMAGE /bin/bash $DOCKER_PERF_DIR'perf.sh' -d $DOCKER_PERF_DIR -o $OPTION -m $MODEL_PATH -w $WORKSPACE -e "$EP_LIST"
|
||||
docker run --gpus all -v $PERF_DIR:$DOCKER_PERF_DIR -v $MODEL_VOLUME/$OPTION:$DOCKER_PERF_DIR$OPTION $DOCKER_IMAGE /bin/bash $DOCKER_PERF_DIR'perf.sh' -d $DOCKER_PERF_DIR -o $OPTION -m $MODEL_PATH -e "$EP_LIST" "$PERF_ARGUMENTS"
|
||||
|
|
|
|||
|
|
@ -90,7 +90,10 @@ jobs:
|
|||
value: machine.sh
|
||||
|
||||
- name: with_arguments
|
||||
value: $(environment) -e "$(epList)"
|
||||
value: $(environment) -e "$(epList)"
|
||||
|
||||
- name: optional_arguments
|
||||
value: -a "-a -g $(optimizeGraph) -b $(bindInputs) -n $(enableCache)"
|
||||
|
||||
steps:
|
||||
|
||||
|
|
@ -127,7 +130,7 @@ jobs:
|
|||
displayName: 'Start Anubis Job'
|
||||
|
||||
- ${{ each option in parameters.ModelGroups }}:
|
||||
- script: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/run_perf_$(with_arguments) -o ${{option}} -m $(${{option}})'
|
||||
- script: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/run_perf_$(with_arguments) -o ${{option}} -m $(${{option}}) $(optional_arguments)'
|
||||
displayName: '${{option}} perf'
|
||||
workingDirectory: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/'
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue