From e45197fa8cc32310ca4e994d1fdb7304a5bc94b0 Mon Sep 17 00:00:00 2001
From: Adrian Lizarraga <adlizarraga@microsoft.com>
Date: Wed, 18 May 2022 15:36:21 -0700
Subject: [PATCH] [trt-ep-perf] Fix upload time of EP perf data (#11531)

Fix the post.py script to use the actual "upload time" in ISO format instead of the day/month/year of the commit date.
---
 .../python/tools/tensorrt/perf/post.py        | 243 ++++++++++++++----
 ...linux-gpu-tensorrt-daily-perf-pipeline.yml |   2 +-
 2 files changed, 193 insertions(+), 52 deletions(-)

diff --git a/onnxruntime/python/tools/tensorrt/perf/post.py b/onnxruntime/python/tools/tensorrt/perf/post.py
index f127f491b3..312409e822 100644
--- a/onnxruntime/python/tools/tensorrt/perf/post.py
+++ b/onnxruntime/python/tools/tensorrt/perf/post.py
@@ -1,32 +1,83 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
 import argparse
+import datetime
 import os
 import sys
-import time
 
 import pandas as pd
 from azure.kusto.data import KustoConnectionStringBuilder
 from azure.kusto.data.data_format import DataFormat
-from azure.kusto.data.helpers import dataframe_from_result_table
 from azure.kusto.ingest import IngestionProperties, QueuedIngestClient, ReportLevel
-from perf_utils import *
+from perf_utils import (
+    avg_ending,
+    cpu,
+    cuda,
+    cuda_fp16,
+    fail_name,
+    group_title,
+    latency_name,
+    latency_over_time_name,
+    memory_ending,
+    memory_name,
+    model_title,
+    ort_provider_list,
+    provider_list,
+    second,
+    session_name,
+    specs_name,
+    standalone_trt,
+    standalone_trt_fp16,
+    status_name,
+    table_headers,
+    trt,
+    trt_fp16,
+)
 
 # database connection strings
-cluster_ingest = "https://ingest-onnxruntimedashboarddb.southcentralus.kusto.windows.net"
-database = "ep_perf_dashboard"
+CLUSTER_INGEST = "https://ingest-onnxruntimedashboarddb.southcentralus.kusto.windows.net"
+DATABASE_NAME = "ep_perf_dashboard"
 
 
 def parse_arguments():
+    """
+    Parses command-line arguments and returns an object with each argument as a field.
+
+    :return: An object whose fields represent the parsed command-line arguments.
+    """
+
     parser = argparse.ArgumentParser()
     parser.add_argument("-r", "--report_folder", help="Path to the local file report", required=True)
-    parser.add_argument("-c", "--commit_hash", help="Commit id", required=True)
+    parser.add_argument("-c", "--commit_hash", help="Commit hash", required=True)
     parser.add_argument("-u", "--report_url", help="Report Url", required=True)
     parser.add_argument("-t", "--trt_version", help="Tensorrt Version", required=True)
     parser.add_argument("-b", "--branch", help="Branch", required=True)
-    parser.add_argument("-d", "--datetime", help="Commit Datetime", required=True)
+    parser.add_argument(
+        "-d",
+        "--commit_datetime",
+        help="Commit datetime in Python's datetime ISO 8601 format",
+        required=True,
+        type=datetime.datetime.fromisoformat,
+    )
+
     return parser.parse_args()
 
 
 def adjust_columns(table, columns, db_columns, model_group):
+    """
+    Utility function that replaces column names in an in-memory table with the appropriate database column names.
+    Additionly, this function adds a model group column to all rows in the table.
+
+    :param table: The Pandas table to adjust.
+    :param columns: A list of existing column names to rename.
+    :param db_columns: A list of databse columns names to use.
+    :param model_group: The model group to append as a column.
+
+    :return: The updated table.
+    """
+
     table = table[columns]
     table = table.set_axis(db_columns, axis=1)
     table = table.assign(Group=model_group)
@@ -34,28 +85,47 @@ def adjust_columns(table, columns, db_columns, model_group):
 
 
 def get_latency_over_time(commit_hash, report_url, branch, latency_table):
-    if not latency_table.empty:
-        over_time = latency_table
-        over_time = over_time.melt(id_vars=[model_title, group_title], var_name="Ep", value_name="Latency")
-        over_time = over_time.assign(CommitId=commit_hash)
-        over_time = over_time.assign(ReportUrl=report_url)
-        over_time = over_time.assign(Branch=branch)
-        over_time = over_time[
-            [
-                "CommitId",
-                model_title,
-                "Ep",
-                "Latency",
-                "ReportUrl",
-                group_title,
-                "Branch",
-            ]
+    """
+    Returns a new Pandas table with data that tracks the latency of model/EP inference runs over time.
+
+    :param commit_hash: The short git commit hash corresponding to the version of ORT used to gather latency data.
+    :param report_url: The URL of the Azure pipeline run/report which produced this latency data.
+    :param branch: The name of the git branch corresponding to the version of ORT used to gather latency data.
+    :param latency_table: The Pandas table containing raw "latency over time" data imported from a CSV file.
+
+    :return: The updated table.
+    """
+
+    over_time = latency_table
+    over_time = over_time.melt(id_vars=[model_title, group_title], var_name="Ep", value_name="Latency")
+    over_time = over_time.assign(CommitId=commit_hash)
+    over_time = over_time.assign(ReportUrl=report_url)
+    over_time = over_time.assign(Branch=branch)
+    over_time = over_time[
+        [
+            "CommitId",
+            model_title,
+            "Ep",
+            "Latency",
+            "ReportUrl",
+            group_title,
+            "Branch",
         ]
-        over_time.fillna("", inplace=True)
-        return over_time
+    ]
+    over_time.fillna("", inplace=True)
+    return over_time
 
 
 def get_failures(fail, model_group):
+    """
+    Returns a new Pandas table with data that tracks failed model/EP inference runs.
+
+    :param fail: The Pandas table containing raw failure data imported from a CSV file.
+    :param model_group: The model group namespace to append as a column.
+
+    :return: The updated table.
+    """
+
     fail_columns = fail.keys()
     fail_db_columns = [model_title, "Ep", "ErrorType", "ErrorMessage"]
     fail = adjust_columns(fail, fail_columns, fail_db_columns, model_group)
@@ -63,6 +133,15 @@ def get_failures(fail, model_group):
 
 
 def get_memory(memory, model_group):
+    """
+    Returns a new Pandas table with data that tracks peak memory usage per model/EP.
+
+    :param memory: The Pandas table containing raw memory usage data imported from a CSV file.
+    :param model_group: The model group namespace to append as a column.
+
+    :return: The updated table.
+    """
+
     memory_columns = [model_title]
     for provider in provider_list:
         if cpu not in provider:
@@ -81,6 +160,15 @@ def get_memory(memory, model_group):
 
 
 def get_latency(latency, model_group):
+    """
+    Returns a new Pandas table with data that tracks inference run latency per model/EP.
+
+    :param latency: The Pandas table containing raw latency data imported from a CSV file.
+    :param model_group: The model group namespace to append as a column.
+
+    :return: The updated table.
+    """
+
     latency_columns = [model_title]
     for provider in provider_list:
         latency_columns.append(provider + avg_ending)
@@ -90,19 +178,40 @@ def get_latency(latency, model_group):
 
 
 def get_status(status, model_group):
+    """
+    Returns a new Pandas table with data that tracks whether an EP can successfully run a particular model.
+
+    :param status: The Pandas table containing raw model/EP status data imported from a CSV file.
+    :param model_group: The model group namespace to append as a column.
+
+    :return: The updated table.
+    """
+
     status_columns = status.keys()
     status_db_columns = table_headers
     status = adjust_columns(status, status_columns, status_db_columns, model_group)
     return status
 
 
-def get_specs(specs, branch, commit_id, date_time):
+def get_specs(specs, branch, commit_hash, commit_datetime):
+    """
+    Returns a new Pandas table with data that tracks the configuration/specs/versions of the hardware and software
+    used to gather benchmarking data.
+
+    :param specs: The Pandas table containing raw specs data imported from a CSV file.
+    :param branch: The name of the git branch corresponding to the version of ORT used to gather data.
+    :param commit_hash: The short git commit hash corresponding to the version of ORT used to gather data.
+    :param commit_datetime: The git commit datetime corresponding to the version of ORT used to gather data.
+
+    :return: The updated table.
+    """
+
     init_id = int(specs.tail(1).get(".", 0)) + 1
     specs_additional = pd.DataFrame(
         {
             ".": [init_id, init_id + 1, init_id + 2],
             "Spec": ["Branch", "CommitId", "CommitTime"],
-            "Version": [branch, commit_id, date_time],
+            "Version": [branch, commit_hash, str(commit_datetime)],
         }
     )
 
@@ -110,19 +219,41 @@ def get_specs(specs, branch, commit_id, date_time):
 
 
 def get_session(session, model_group):
+    """
+    Returns a new Pandas table with data that tracks the ORT session creation time for each model/EP combination.
+
+    :param session: The Pandas table containing raw model/EP session timing data imported from a CSV file.
+    :param model_group: The model group namespace to append as a column.
+
+    :return: The updated table.
+    """
+
     session_columns = session.keys()
     session_db_columns = [model_title] + ort_provider_list + [p + second for p in ort_provider_list]
     session = adjust_columns(session, session_columns, session_db_columns, model_group)
     return session
 
 
-def write_table(ingest_client, table, table_name, commit_time, identifier):
+def write_table(ingest_client, table, table_name, upload_time, identifier):
+    """
+    Uploads the provided table to the database. This function also appends the upload time and unique run identifier
+    to the table.
+
+    :param ingest_client: An instance of QueuedIngestClient used to initiate data ingestion.
+    :param table: The Pandas table to ingest.
+    :param table_name: The name of the table in the database.
+    :param upload_time: A datetime object denoting the data's upload time.
+    :param identifier: An identifier that associates the uploaded data with an ORT commit/date/branch.
+    """
+
     if table.empty:
         return
-    table = table.assign(UploadTime=commit_time)  # add Commit DateTime
-    table = table.assign(Identifier=identifier)  # add Identifier
+
+    # Add upload time and identifier columns to data table.
+    table = table.assign(UploadTime=str(upload_time))
+    table = table.assign(Identifier=identifier)
     ingestion_props = IngestionProperties(
-        database=database,
+        database=DATABASE_NAME,
         table=table_name,
         data_format=DataFormat.CSV,
         report_level=ReportLevel.FailuresAndSuccesses,
@@ -131,25 +262,34 @@ def write_table(ingest_client, table, table_name, commit_time, identifier):
     ingest_client.ingest_from_dataframe(table, ingestion_properties=ingestion_props)
 
 
-def get_time():
-    date_time = time.strftime(time_string_format)
-    return date_time
+def get_identifier(commit_datetime, commit_hash, trt_version, branch):
+    """
+    Returns an identifier that associates uploaded data with an ORT commit/date/branch and a TensorRT version.
 
+    :param commit_datetime: The datetime of the ORT commit used to run the benchmarks.
+    :param commit_hash: The hash of the ORT commit used to run the benchmarks.
+    :param trt_version: The TensorRT version used to run the benchmarks.
+    :param branch: The name of the ORT branch used to run the benchmarks.
 
-def get_identifier(date_time, commit_id, trt_version, branch):
-    date = date_time.split("T")[0]  # extract date only
-    return date + "_" + commit_id + "_" + trt_version + "_" + branch
+    :return: A string identifier.
+    """
+
+    date = str(commit_datetime.date())  # extract date only
+    return date + "_" + commit_hash + "_" + trt_version + "_" + branch
 
 
 def main():
+    """
+    Entry point of this script. Uploads data produced by benchmarking scripts to the database.
+    """
 
     args = parse_arguments()
 
     # connect to database
-    kcsb_ingest = KustoConnectionStringBuilder.with_az_cli_authentication(cluster_ingest)
+    kcsb_ingest = KustoConnectionStringBuilder.with_az_cli_authentication(CLUSTER_INGEST)
     ingest_client = QueuedIngestClient(kcsb_ingest)
-    date_time = args.datetime
-    identifier = get_identifier(date_time, args.commit_hash, args.trt_version, args.branch)
+    identifier = get_identifier(args.commit_datetime, args.commit_hash, args.trt_version, args.branch)
+    upload_time = datetime.datetime.now(tz=datetime.timezone.utc).replace(microsecond=0)
 
     try:
         result_file = args.report_folder
@@ -181,7 +321,7 @@ def main():
                     )
                 elif specs_name in csv:
                     table_results[specs_name] = table_results[specs_name].append(
-                        get_specs(table, args.branch, args.commit_hash, date_time),
+                        get_specs(table, args.branch, args.commit_hash, args.commit_datetime),
                         ignore_index=True,
                     )
                 elif fail_name in csv:
@@ -195,15 +335,16 @@ def main():
                     table_results[latency_name] = table_results[latency_name].append(
                         get_latency(table, model_group), ignore_index=True
                     )
-                    table_results[latency_over_time_name] = table_results[latency_over_time_name].append(
-                        get_latency_over_time(
-                            args.commit_hash,
-                            args.report_url,
-                            args.branch,
-                            table_results[latency_name],
-                        ),
-                        ignore_index=True,
-                    )
+                    if not table_results[latency_name].empty:
+                        table_results[latency_over_time_name] = table_results[latency_over_time_name].append(
+                            get_latency_over_time(
+                                args.commit_hash,
+                                args.report_url,
+                                args.branch,
+                                table_results[latency_name],
+                            ),
+                            ignore_index=True,
+                        )
                 elif status_name in csv:
                     table_results[status_name] = table_results[status_name].append(
                         get_status(table, model_group), ignore_index=True
@@ -216,7 +357,7 @@ def main():
                 ingest_client,
                 table_results[table],
                 db_table_name,
-                date_time,
+                upload_time,
                 identifier,
             )
 
diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-daily-perf-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-daily-perf-pipeline.yml
index 7dc0590ce4..504ee5a952 100644
--- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-daily-perf-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-daily-perf-pipeline.yml
@@ -181,7 +181,7 @@ jobs:
           scriptType: bash
           inlineScript: |
             short_hash=$(git rev-parse --short HEAD) &&
-            commit_date=$(git log -1 --date=short --pretty=format:%cd) &&
+            commit_date=$(git log -1 --date=iso-strict --pretty=format:%cd) &&
             python3 $(Build.SourcesDirectory)/onnxruntime/python/tools/tensorrt/perf/post.py -r $(Build.SourcesDirectory)/Artifact/result -c $short_hash -d $commit_date -u "$(reportUrl)?buildId=$(Build.BuildId)" -t ${{ parameters.TrtVersion }} -b $(branch)
     
     - template: templates/component-governance-component-detection-steps.yml