Setup EP Dashboard (#7321)

* setting up dashboard * posting to ort dashboard * creating separate docker file * including common deps * tracking latency over time
2026-07-13 18:08:13 +00:00 · 2021-05-11 10:33:39 -07:00 · 2021-05-11 10:33:39 -07:00 · 29172d8f54
commit 29172d8f54
parent ce8473a4ea
7 changed files with 257 additions and 95 deletions
--- a/onnxruntime/python/tools/tensorrt/perf/benchmark.py
+++ b/onnxruntime/python/tools/tensorrt/perf/benchmark.py
@ -45,8 +45,8 @@ ep_to_provider_list = {
 # latency gain headers 
 trt_cuda_gain = 'TRT_CUDA_gain(%)'
 trt_cuda_fp16_gain = 'TRT_CUDA_fp16_gain(%)'
-trt_native_gain = 'EP_Native_TRT_gain(%)'
-trt_native_fp16_gain = 'EP_Native_TRT_fp16_gain(%)'
+trt_native_gain = 'TRT_Standalone_gain(%)'
+trt_native_fp16_gain = 'TRT_Standalone_fp16_gain(%)'

 # metadata
 FAIL_MODEL_FILE = ".fail_model_map"
@ -55,7 +55,7 @@ METRICS_FILE = ".metrics_map"
 MEMORY_FILE = './temp_memory.csv'

 def run_trt_standalone(trtexec, model_path, ort_inputs, all_inputs_shape, fp16):
-    logger.info("running native trt")
+    logger.info("running standalone trt")
    model_path = "--onnx=" + model_path
    input_shape = []

@ -1019,17 +1019,18 @@ def run_onnxruntime(args, models):
                    "sequence_length": 1,
                    "datetime": str(datetime.now()),}
                    
-                # get standalone TensorRT perf
                if trt in ep and args.trtexec:
                    
+                    # get standalone TensorRT perf
                    try: 
+                        ep = standalone_trt_fp16 if fp16 else standalone_trt
+                        
                        if args.track_memory: 
-                                ep = standalone_trt_fp16 if fp16 else standalone_trt
-                                p = start_memory_tracking()            
-                                result = run_trt_standalone(args.trtexec, model_path, sess.get_inputs(), all_inputs_shape, fp16)
-                                mem_usage = end_memory_tracking(p, True)
-                                if result and mem_usage: 
-                                    result["memory"] = mem_usage
+                            p = start_memory_tracking()            
+                            result = run_trt_standalone(args.trtexec, model_path, sess.get_inputs(), all_inputs_shape, fp16)
+                            mem_usage = end_memory_tracking(p, True)
+                            if result and mem_usage: 
+                                result["memory"] = mem_usage

                        else: 
                            result = run_trt_standalone(args.trtexec, model_path, sess.get_inputs(), all_inputs_shape, fp16)
@ -1247,7 +1248,7 @@ def output_status(results, csv_filename):
                        standalone_trt + " fp32",
                        cuda + " fp16",
                        trt + " fp16",
-                        standalone_trt + "fp16"
+                        standalone_trt + " fp16"
                        ]

        csv_writer = csv.writer(csv_file)
@ -1300,30 +1301,30 @@ def output_latency(results, csv_filename):

    with open(csv_filename, mode="a", newline='') as csv_file:
        column_names = ["Model",
-                        "CPU \nmean (ms)",
-                        "CPU \n 90th percentile (ms)",
+                        "CPU fp32 \nmean (ms)",
+                        "CPU fp32 \n 90th percentile (ms)",
                        "CUDA fp32 \nmean (ms)",
                        "CUDA fp32 \n90th percentile (ms)",
-                        "CUDA EP fp32 \nmemory usage (MiB)",
+                        "CUDA EP fp32 \npeak memory usage (MiB)",
                        "TRT EP fp32 \nmean (ms)",
                        "TRT EP fp32 \n90th percentile (ms)",
-                        "TRT EP fp32 \nmemory usage (MiB)",
+                        "TRT EP fp32 \npeak memory usage (MiB)",
                        "Standalone TRT fp32 \nmean (ms)",
                        "Standalone TRT fp32 \n90th percentile (ms)",
-                        "Standalone TRT fp32 \nmemory usage (MiB)",
+                        "Standalone TRT fp32 \npeak memory usage (MiB)",
                        "TRT v CUDA EP fp32 \ngain (mean) (%)",
-                        "EP v Native TRT fp32 \ngain (mean) (%)",
+                        "EP v Standalone TRT fp32 \ngain (mean) (%)",
                        "CUDA fp16 \nmean (ms)",
                        "CUDA fp16 \n90th percentile (ms)",
-                        "CUDA EP fp16 \nmemory usage (MiB)",
+                        "CUDA EP fp16 \npeak memory usage (MiB)",
                        "TRT EP fp16 \nmean (ms)",
-                        "TRT EP fp16 \n90 percentile (ms)",
-                        "TRT EP fp16 \nmemory usage (MiB)",
+                        "TRT EP fp16 \n90th percentile (ms)",
+                        "TRT EP fp16 \npeak memory usage (MiB)",
                        "Standalone TRT fp16 \nmean (ms)",
                        "Standalone TRT fp16 \n90th percentile (ms)",
-                        "Standalone TRT fp16 \nmemory usage (MiB)",
+                        "Standalone TRT fp16 \npeak memory usage (MiB)",
                        "TRT v CUDA EP fp16 \ngain (mean) (%)", 
-                        "EP v Native TRT fp16 \ngain (mean) (%)"]
+                        "EP v Standalone TRT fp16 \ngain (mean) (%)"]
        csv_writer = csv.writer(csv_file)

        if need_write_header:
--- a/onnxruntime/python/tools/tensorrt/perf/build/build_image.sh
+++ b/onnxruntime/python/tools/tensorrt/perf/build/build_image.sh
@ -9,4 +9,4 @@ i) IMAGE_NAME=${OPTARG};;
 esac
 done 

-sudo docker build --no-cache -t $IMAGE_NAME --build-arg ONNXRUNTIME_BRANCH=$ORT_BRANCH -f $ORT_DOCKERFILE_PATH ..
+sudo docker build --no-cache -t $IMAGE_NAME --build-arg ONNXRUNTIME_BRANCH=$ORT_BRANCH -f $PERF_DOCKERFILE_PATH ..
--- a/onnxruntime/python/tools/tensorrt/perf/model_list.json
+++ b/onnxruntime/python/tools/tensorrt/perf/model_list.json
@ -68,49 +68,19 @@
    {
        "model_name": "squeezenet1.1-7",
        "working_directory": "./models/squeezenet1.1-7/",
-        "model_path": "./squeezenet1.1/._squeezenet1.1.onnx",
+        "model_path": "./squeezenet1.1/squeezenet1.1.onnx",
        "test_data_path": "./squeezenet1.1/"
    },
-    {
-        "model_name": "squeezenet1.0-3",
-        "working_directory": "./models/squeezenet1.0-3/",
-        "model_path": "./squeezenet/model.onnx",
-        "test_data_path": "./squeezenet/"
-    },
-    {
-        "model_name": "squeezenet1.0-6",
-        "working_directory": "./models/squeezenet1.0-6/",
-        "model_path": "./squeezenet/model.onnx",
-        "test_data_path": "./squeezenet/"
-    },
-    {
-        "model_name": "squeezenet1.0-7",
-        "working_directory": "./models/squeezenet1.0-7/",
-        "model_path": "./squeezenet/model.onnx",
-        "test_data_path": "./squeezenet/"
-    },
-    {
-        "model_name": "squeezenet1.0-8",
-        "working_directory": "./models/squeezenet1.0-8/",
-        "model_path": "./squeezenet/model.onnx",
-        "test_data_path": "./squeezenet/"
-    },
-    {
-        "model_name": "squeezenet1.0-9",
-        "working_directory": "./models/squeezenet1.0-9/",
-        "model_path": "./squeezenet/model.onnx",
-        "test_data_path": "./squeezenet/"
-    },
    {
        "model_name": "vgg16-7",
        "working_directory": "./models/vgg16-7/",
-        "model_path": "./vgg16/._vgg16.onnx",
+        "model_path": "./vgg16/vgg16.onnx",
        "test_data_path": "./vgg16/"
    },
    {
        "model_name": "vgg19-bn-7",
        "working_directory": "./models/vgg19-bn-7/",
-        "model_path": "./vgg19-bn/._vgg19-bn.onnx",
+        "model_path": "./vgg19-bn/vgg19-bn.onnx",
        "test_data_path": "./vgg19-bn/"
    },
    {
@ -127,18 +97,6 @@
    },
    {
        "model_name": "caffenet-9",
-        "working_directory": "./models/caffenet-9/",
-        "model_path": "./bvlc_reference_caffenet/model.onnx",
-        "test_data_path": "./bvlc_reference_caffenet/"
-    },
-    {
-        "model_name": "rcnn-ilsvrc13-9",
-        "working_directory": "./models/rcnn-ilsvrc13-9/",
-        "model_path": "./bvlc_reference_rcnn_ilsvrc13/model.onnx",
-        "test_data_path": "./bvlc_reference_rcnn_ilsvrc13/"
-    },
-    {
-        "model_name": "densenet-9",
        "working_directory": "./models/densenet-9/",
        "model_path": "./densenet121/model.onnx",
        "test_data_path": "./densenet121/"
@ -224,7 +182,7 @@
    {
        "model_name": "yolov4",
        "working_directory": "./models/yolov4/",
-        "model_path": "./yolov4/yolov4.onnx",
+        "model_path": "./yolov4/yolov4_shape.onnx",
        "test_data_path": "./yolov4/"
    },
    {
@ -251,12 +209,6 @@
        "model_path": "./GPT-2-LM-HEAD/model.onnx",
        "test_data_path": "./GPT-2-LM-HEAD/"
    },
-    {
-        "model_name": "gpt2-10",
-        "working_directory": "./models/gpt2-10/",
-        "model_path": "./GPT2/model.onnx",
-        "test_data_path": "./GPT2/"
-    },
    {
        "model_name": "zfnet512-9",
        "working_directory": "./models/zfnet512-9/",
@ -268,11 +220,5 @@
        "working_directory": "./models/arcfaceresnet100-8/",
        "model_path": "./resnet100/resnet100.onnx",
        "test_data_path": "./resnet100/"
-    },
-    {
-        "model_name": "mosaic-9",
-        "working_directory": "./models/mosaic-9/",
-        "model_path": "./mosaic/mosaic.onnx",
-        "test_data_path": "./mosaic/"
    }
 ]
--- a/onnxruntime/python/tools/tensorrt/perf/post.py
+++ b/onnxruntime/python/tools/tensorrt/perf/post.py
@ -0,0 +1,195 @@
+import argparse
+import mysql.connector
+import sys
+import os
+import subprocess
+import pandas as pd
+from sqlalchemy import create_engine
+
+# database connection strings 
+sql_connector = 'mysql+mysqlconnector://'
+user='powerbi@onnxruntimedashboard'
+password=os.environ.get('DASHBOARD_MYSQL_ORT_PASSWORD')
+host='onnxruntimedashboard.mysql.database.azure.com'
+database='onnxruntime'
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-r", "--report_folder", help="Path to the local file report", required=True)
+    parser.add_argument(
+        "-c", "--commit_hash", help="Commit id", required=True)
+    parser.add_argument(
+        "-u", "--report_url", help="Report Url", required=True)
+    
+    return parser.parse_args()
+
+def parse_csv(report_file):
+    table = pd.read_csv(report_file)
+    return table
+
+def insert_latency(commit_hash, report_url, latency): 
+    
+    # connect to database
+    cnx = mysql.connector.connect(
+            user=user,
+            password=password,
+            host=host,
+            database=database)
+    
+    try:
+        cursor = cnx.cursor()
+
+        # delete old records
+        delete_query = ('DELETE FROM onnxruntime.ep_latency_over_time '
+                        'WHERE UploadTime < DATE_SUB(Now(), INTERVAL 30 DAY);'
+                        )
+
+        cursor.execute(delete_query)
+        if not latency.empty:
+            print('posting latency over time results to dashboard')
+            to_drop = ['TrtGain-CudaFp32', 'EpGain-TrtFp32', 'TrtGain-CudaFp16', 'EpGain-TrtFp16']
+            over_time = latency.drop(to_drop, axis='columns')
+            over_time = over_time.melt(id_vars=['Model', 'Group'], var_name='Ep', value_name='Latency')
+            
+            import time   
+            datetime = time.strftime('%Y-%m-%d %H:%M:%S')
+            over_time = over_time.assign(UploadTime=datetime)
+            over_time = over_time.assign(CommitId=commit_hash)
+            over_time = over_time.assign(ReportUrl=report_url)
+            
+            over_time = over_time[['UploadTime', 'CommitId', 'Model', 'Ep', 'Latency', 'ReportUrl', 'Group']]
+            over_time.fillna('', inplace=True)
+            tuples = list(over_time.to_records(index=False))
+            tuples = str(tuples)[1:-1] # cut off list brackets
+
+            # insert current record
+            insert_query = ('INSERT INTO onnxruntime.ep_latency_over_time '
+                            '''(UploadTime, CommitId, Model, Ep, Latency, ReportUrl, ModelGroup) '''
+                            '''VALUES %s; ''')
+            
+            query = insert_query % tuples
+            cursor.execute(query)
+        
+        cnx.commit()
+
+        cursor.close()
+        cnx.close()
+    except BaseException as e:
+        cnx.close()
+        raise e
+
+def adjust_columns(table, columns, db_columns, model_group): 
+    table = table[columns]
+    table = table.set_axis(db_columns, axis=1)
+    table = table.assign(Group=model_group)
+    return table 
+
+def get_failures(fail, model_group):
+    fail_columns = fail.keys()
+    fail_db_columns = ['Model', 'Ep', 'ErrorType', 'ErrorMessage']
+    fail = adjust_columns(fail, fail_columns, fail_db_columns, model_group)
+    return fail
+
+def get_memory(memory, model_group): 
+    memory_columns = ['Model', \
+                      'CUDA EP fp32 \npeak memory usage (MiB)', \
+                      'TRT EP fp32 \npeak memory usage (MiB)', \
+                      'Standalone TRT fp32 \npeak memory usage (MiB)', \
+                      'CUDA EP fp16 \npeak memory usage (MiB)', \
+                      'TRT EP fp16 \npeak memory usage (MiB)', \
+                      'Standalone TRT fp16 \npeak memory usage (MiB)' \
+                      ]
+    memory_db_columns = ['Model', 'CudaFp32', 'TrtFp32', 'StandaloneFp32', 'CudaFp16', 'TrtFp16', 'StandaloneFp16']
+    memory = adjust_columns(memory, memory_columns, memory_db_columns, model_group)
+    return memory
+
+def get_latency(latency, model_group):
+    latency_columns = ['Model', \
+                        'CPU fp32 \nmean (ms)', \
+                        'CUDA fp32 \nmean (ms)', \
+                        'TRT EP fp32 \nmean (ms)', \
+                        'Standalone TRT fp32 \nmean (ms)', \
+                        'TRT v CUDA EP fp32 \ngain (mean) (%)', \
+                        'EP v Standalone TRT fp32 \ngain (mean) (%)',     
+                        'CUDA fp16 \nmean (ms)', \
+                        'TRT EP fp16 \nmean (ms)', \
+                        'Standalone TRT fp16 \nmean (ms)', \
+                        'TRT v CUDA EP fp16 \ngain (mean) (%)', \
+                        'EP v Standalone TRT fp16 \ngain (mean) (%)' \
+                        ]
+    latency_db_columns = ['Model', 'CpuFp32', 'CudaEpFp32', 'TrtEpFp32', 'StandaloneFp32', 'TrtGain-CudaFp32', 'EpGain-TrtFp32', \
+                        'CudaEpFp16', 'TrtEpFp16', 'StandaloneFp16', 'TrtGain-CudaFp16', 'EpGain-TrtFp16']
+    latency = adjust_columns(latency, latency_columns, latency_db_columns, model_group)
+    return latency
+    
+def get_status(status, model_group):
+    status_columns = status.keys()
+    status_db_columns = ['Model', 'CpuFp32', 'CudaEpFp32', 'TrtEpFp32', 'StandaloneFp32', 'CudaEpFp16', 'TrtEpFp16', 'StandaloneFp16']
+    status = adjust_columns(status, status_columns, status_db_columns, model_group)
+    return status
+
+def get_database_cert(): 
+    cert = 'BaltimoreCyberTrustRoot.crt.pem'
+    if not os.path.exists(cert):
+        p = subprocess.run(["wget", "https://cacerts.digicert.com/DigiCertGlobalRootG2.crt.pem", "-O", cert], check=True)
+    return cert 
+
+def write_table(engine, table, table_name): 
+    table.to_sql(table_name, con=engine, if_exists='replace', index=False, chunksize=1)
+
+def main():
+    
+    # connect to database 
+    cert = get_database_cert()
+    ssl_args = {'ssl_ca': cert}
+    connection_string = sql_connector + \
+                        user + \
+                        password + \
+                        host + \
+                        database
+    engine = create_engine(connection_string, connect_args=ssl_args)
+
+    try: 
+        args = parse_arguments()
+        result_file = args.report_folder
+
+        folders = os.listdir(result_file)
+        os.chdir(result_file)
+       
+        fail = pd.DataFrame()
+        memory = pd.DataFrame()
+        latency = pd.DataFrame()
+        status = pd.DataFrame()
+       
+        for model_group in folders: 
+            os.chdir(model_group)
+            csv_filenames = os.listdir()
+            for csv in csv_filenames: 
+                table = parse_csv(csv)
+                if "fail" in csv:
+                    fail = fail.append(get_failures(table, model_group), ignore_index=True)
+                if "latency" in csv:
+                    memory = memory.append(get_memory(table, model_group), ignore_index=True)
+                    latency = latency.append(get_latency(table, model_group), ignore_index=True)
+                if "status" in csv: 
+                    status = status.append(get_status(table, model_group), ignore_index=True)
+            os.chdir(result_file)
+    
+        print('writing failures over time to database')
+        write_table(engine, fail, 'ep_model_fails')
+        print('writing memory to database')
+        write_table(engine, memory, 'ep_model_memory')
+        print('writing latency to database')
+        write_table(engine, latency, 'ep_model_latency')
+        print('writing status to database')
+        write_table(engine, status, 'ep_models_status')
+        print('writing latency over time to database')
+        insert_latency(args.commit_hash, args.report_url, latency)
+
+    except BaseException as e: 
+        print(str(e))
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()
--- a/onnxruntime/python/tools/tensorrt/perf/run_perf_docker.sh
+++ b/onnxruntime/python/tools/tensorrt/perf/run_perf_docker.sh
@ -1,36 +1,46 @@
 #!/bin/bash

 # Parse Arguments
-while getopts d:o:m: parameter
+while getopts d:o:m:p: parameter
 do case "${parameter}"
 in 
 d) DOCKER_IMAGE=${OPTARG};;
 o) OPTION=${OPTARG};;
 m) MODEL_PATH=${OPTARG};;
+p) PERF_DIR=${OPTARG};;
 esac
 done 

 # Variables
-MACHINE_PERF_DIR=/home/hcsuser/perf/
 DOCKER_PERF_DIR=/usr/share/perf/
 PERF_SCRIPT=$DOCKER_PERF_DIR'perf.sh'
-VOLUME=$MACHINE_PERF_DIR:$DOCKER_PERF_DIR
+VOLUME=$PERF_DIR:$DOCKER_PERF_DIR
+ONNX_ZOO_VOLUME=' -v /home/hcsuser/perf/models:/usr/share/perf/models'
+MANY_MODELS_VOLUME=' -v /home/hcsuser/mount/many-models:/usr/share/mount/many-models'
+PARTNER_VOLUME=' -v /home/hcsuser/perf/partner:/usr/share/perf/partner'

 # Add Remaining Variables
 if [ $OPTION == "onnx-zoo-models" ]
 then 
-    MODEL_PATH=model_list.json
+    MODEL_PATH='model_list.json'
+    VOLUME=$VOLUME$ONNX_ZOO_VOLUME
 fi 

 if [ $OPTION == "many-models" ]
 then 
    MODEL_PATH=/usr/share/mount/many-models
-    VOLUME=$VOLUME' -v /home/hcsuser/mount/many-models:/usr/share/mount/many-models'
+    VOLUME=$VOLUME$MANY_MODELS_VOLUME
 fi 

 if [ $OPTION == "partner-models" ]
 then 
-   MODEL_PATH=partner_model_list.json
+   MODEL_PATH='partner/partner_model_list.json'
+   VOLUME=$VOLUME$PARTNER_VOLUME
+fi
+
+if [ $OPTION == "selected-models" ]
+then	
+  VOLUME=$VOLUME$ONNX_ZOO_VOLUME$MANY_MODELS_VOLUME$PARTNER_VOLUME' -v /home/hcsuser/perf/subset_jsons/:/usr/share/perf/subset_jsons'
 fi

 sudo docker run --gpus all -v $VOLUME $DOCKER_IMAGE /bin/bash $PERF_SCRIPT -d $DOCKER_PERF_DIR -o $OPTION -m $MODEL_PATH
--- a/onnxruntime/python/tools/tensorrt/perf/setup_scripts/setup_onnx_zoo.py
+++ b/onnxruntime/python/tools/tensorrt/perf/setup_scripts/setup_onnx_zoo.py
@ -55,7 +55,7 @@ def get_model_info(link):
 def write_json(models): 
    model_json = json.dumps(models, indent=4) 
    with open('model_list.json', 'w') as fp: 
-        fp.write(models_json)
+        fp.write(model_json)

 def main():
    links = []
--- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-perf-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-perf-pipeline.yml
@ -4,26 +4,26 @@ jobs:
  variables:
    ALLOW_RELEASED_ONNX_OPSET_ONLY: '1'
    branch: 'master'
-  timeoutInMinutes: 4000 
+  timeoutInMinutes: 3000 
  steps:
  
-    - script: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/build/build_image.sh -p $(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/build/Dockerfile.tensorrt-perf -b master -i ort-$(branch)'
+    - script: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/build/build_image.sh -p $(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/build/Dockerfile.tensorrt-perf -b $(branch) -i ort-$(branch)'
      displayName: 'Build latest ORT Images'
      workingDirectory: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/build'

-    - script: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/run_perf_docker.sh -d ort-$(branch) -o "onnx-zoo-models"'
+    - script: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/run_perf_docker.sh -d ort-$(branch) -o "onnx-zoo-models" -p $(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf'
      displayName: 'Onnx Zoo Models Perf'
      workingDirectory: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/'

-    - script: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/run_perf_docker.sh -d  ort-$(branch) -o "many-models"'
+    - script: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/run_perf_docker.sh -d  ort-$(branch) -o "many-models" -p $(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf'
      displayName: 'Many Models Perf'
      workingDirectory: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/'

-    - script: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/run_perf_docker.sh -d  ort-$(branch) -o "partner-models"'
+    - script: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/run_perf_docker.sh -d  ort-$(branch) -o "partner-models" -p $(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf'
      displayName: 'Partner Models Perf'
      workingDirectory: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/'

-    - script: 'mkdir $(Build.SourcesDirectory)/Artifact && cp -r /home/hcsuser/perf/result/ $(Build.SourcesDirectory)/Artifact' 
+    - script: 'mkdir $(Build.SourcesDirectory)/Artifact && cp -r $(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/result/ $(Build.SourcesDirectory)/Artifact'
      displayName: 'Prepare Artifacts'
      workingDirectory: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/'

@ -31,5 +31,15 @@ jobs:
      inputs:
        pathtoPublish: '$(Build.SourcesDirectory)/Artifact'
        artifactName: 'result'
+    

-    - template: templates/clean-agent-build-directory-step.yml
+    - script: 'python3 $(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/post.py -r $(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/result -c $(Build.SourceVersion) -u "https://dev.azure.com/onnxruntime/onnxruntime/_build/results?buildId=$(Build.BuildId)" ' 
+      displayName: 'Post to Dashboard'
+      workingDirectory: '$(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/'
+      env:
+        DASHBOARD_MYSQL_ORT_PASSWORD: $(dashboard-mysql-ort-password)
+
+    - script: sudo rm -rf $(Agent.BuildDirectory)
+      displayName: Clean build files (POSIX)
+      condition: not(eq(variables['Agent.OS'], 'Windows_NT')) # and always()
+      continueOnError: true  # continuing on error for this step, since linux build folder is somehow getting permission issue