[trt-ep-perf] Fix bug that suppresses latency gain reporting (#11321)

Fix bug that prevents EP perf script from reporting latency gain for TensortRT/CUDA
2026-05-22 22:01:08 +00:00 · 2022-05-17 14:00:52 -07:00 · 2022-05-17 14:00:52 -07:00 · 48efeca66c
commit 48efeca66c
parent 782f9e394d
1 changed files with 51 additions and 16 deletions
--- a/onnxruntime/python/tools/tensorrt/perf/benchmark.py
+++ b/onnxruntime/python/tools/tensorrt/perf/benchmark.py
@ -6,20 +6,55 @@ import logging
 import os
 import pprint
 import re
+import subprocess
 import sys
 import time
 import timeit
 from datetime import datetime

 import coloredlogs
-import numpy
 import numpy as np
-import pandas as pd
-from float16 import *
-from perf_utils import *
+from perf_utils import (
+    acl,
+    acl_ep,
+    avg_ending,
+    basic,
+    calculate_cuda_op_percentage,
+    calculate_trt_latency_percentage,
+    calculate_trt_op_percentage,
+    cpu,
+    cpu_ep,
+    cuda,
+    cuda_ep,
+    cuda_fp16,
+    disable,
+    enable_all,
+    extended,
+    get_output,
+    get_profile_metrics,
+    get_total_ops,
+    is_standalone,
+    memory_ending,
+    model_title,
+    ort_provider_list,
+    percentile_ending,
+    pretty_print,
+    provider_list,
+    second,
+    second_session_ending,
+    session_ending,
+    standalone_trt,
+    standalone_trt_fp16,
+    table_headers,
+    trt,
+    trt_ep,
+    trt_fp16,
+)

 import onnxruntime  # isort:skip
+import onnx  # isort:skip
 from onnx import numpy_helper  # isort:skip
+import pandas as pd  # isort:skip

 debug = False
 sys.path.append(".")
@ -164,15 +199,15 @@ def run_trt_standalone(trtexec, model_name, model_path, all_inputs_shape, fp16,

 def get_latency_result(runtimes, batch_size):
    latency_ms = sum(runtimes) / float(len(runtimes)) * 1000.0
-    latency_variance = numpy.var(runtimes, dtype=numpy.float64) * 1000.0
+    latency_variance = np.var(runtimes, dtype=np.float64) * 1000.0
    throughput = batch_size * (1000.0 / latency_ms)

    result = {
        "test_times": len(runtimes),
        "latency_variance": "{:.2f}".format(latency_variance),
-        "latency_90_percentile": "{:.2f}".format(numpy.percentile(runtimes, 90) * 1000.0),
-        "latency_95_percentile": "{:.2f}".format(numpy.percentile(runtimes, 95) * 1000.0),
-        "latency_99_percentile": "{:.2f}".format(numpy.percentile(runtimes, 99) * 1000.0),
+        "latency_90_percentile": "{:.2f}".format(np.percentile(runtimes, 90) * 1000.0),
+        "latency_95_percentile": "{:.2f}".format(np.percentile(runtimes, 95) * 1000.0),
+        "latency_99_percentile": "{:.2f}".format(np.percentile(runtimes, 99) * 1000.0),
        "average_latency_ms": "{:.2f}".format(latency_ms),
        "QPS": "{:.2f}".format(throughput),
    }
@ -1432,18 +1467,18 @@ def calculate_gain(value, ep1, ep2):

 def add_improvement_information(model_to_latency):
    for key, value in model_to_latency.items():
-        if "ORT-TRT" in value and "ORT-CUDA" in value:
+        if trt in value and cuda in value:
            gain = calculate_gain(value, trt, cuda)
            value[trt_cuda_gain] = "{:.2f} %".format(gain)
-            if trt_fp16 in value and cuda_fp16 in value:
-                gain = calculate_gain(value, trt_fp16, cuda_fp16)
-                value[trt_cuda_fp16_gain] = "{:.2f} %".format(gain)
-        if "ORT-TRT" in value and is_standalone(value):
+        if trt_fp16 in value and cuda_fp16 in value:
+            gain = calculate_gain(value, trt_fp16, cuda_fp16)
+            value[trt_cuda_fp16_gain] = "{:.2f} %".format(gain)
+        if trt in value and standalone_trt in value:
            gain = calculate_gain(value, trt, standalone_trt)
            value[trt_native_gain] = "{:.2f} %".format(gain)
-            if trt_fp16 in value and standalone_trt_fp16 in value:
-                gain = calculate_gain(value, trt_fp16, standalone_trt_fp16)
-                value[trt_native_fp16_gain] = "{:.2f} %".format(gain)
+        if trt_fp16 in value and standalone_trt_fp16 in value:
+            gain = calculate_gain(value, trt_fp16, standalone_trt_fp16)
+            value[trt_native_fp16_gain] = "{:.2f} %".format(gain)


 def output_details(results, csv_filename):