mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-20 21:40:57 +00:00
Add Anubis metrics schema for local benchmark results uploading (#19018)
### Description 1. Add metrics.py for define the metrics schema used by Anubis 2. Add two examples (llama2 and whisper) of how to save local benchmark results following Anubis metrics schema ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> --------- Co-authored-by: Kyle Zhang <Xi.Zhang@microsoft.com> Co-authored-by: ironman <bitzhangxi@outlook.com>
This commit is contained in:
parent
46dd0d3f52
commit
e2c145d37f
3 changed files with 300 additions and 6 deletions
164
onnxruntime/python/tools/transformers/metrics.py
Normal file
164
onnxruntime/python/tools/transformers/metrics.py
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See License.txt in the project root for
|
||||
# license information.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
import datetime
|
||||
import json
|
||||
from typing import Optional
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class BaseObject:
|
||||
def __init__(self):
|
||||
self.customized = {}
|
||||
|
||||
def to_dict(self):
|
||||
default_values = self.__dict__.copy()
|
||||
default_values.pop("customized", None)
|
||||
default_values.update(self.customized)
|
||||
|
||||
for k, v in default_values.items():
|
||||
if isinstance(v, BaseObject):
|
||||
default_values[k] = v.to_dict()
|
||||
|
||||
return {k: v for k, v in default_values.items() if v}
|
||||
|
||||
|
||||
class ModelInfo(BaseObject):
|
||||
def __init__(
|
||||
self,
|
||||
full_name: Optional[str] = None,
|
||||
is_huggingface: Optional[bool] = False,
|
||||
is_text_generation: Optional[bool] = False,
|
||||
short_name: Optional[str] = None,
|
||||
):
|
||||
super().__init__()
|
||||
self.full_name = full_name
|
||||
self.is_huggingface = is_huggingface
|
||||
self.is_text_generation = is_text_generation
|
||||
self.short_name = short_name
|
||||
self.input_shape = []
|
||||
|
||||
|
||||
class BackendOptions(BaseObject):
|
||||
def __init__(
|
||||
self,
|
||||
enable_profiling: Optional[bool] = False,
|
||||
execution_provider: Optional[str] = None,
|
||||
use_io_binding: Optional[bool] = False,
|
||||
):
|
||||
super().__init__()
|
||||
self.enable_profiling = enable_profiling
|
||||
self.execution_provider = execution_provider
|
||||
self.use_io_binding = use_io_binding
|
||||
|
||||
|
||||
class Config(BaseObject):
|
||||
def __init__(
|
||||
self,
|
||||
backend: Optional[str] = "onnxruntime",
|
||||
batch_size: Optional[int] = 1,
|
||||
seq_length: Optional[int] = 0,
|
||||
precision: Optional[str] = "fp32",
|
||||
warmup_runs: Optional[int] = 1,
|
||||
measured_runs: Optional[int] = 10,
|
||||
):
|
||||
super().__init__()
|
||||
self.backend = backend
|
||||
self.batch_size = batch_size
|
||||
self.seq_length = seq_length
|
||||
self.precision = precision
|
||||
self.warmup_runs = warmup_runs
|
||||
self.measured_runs = measured_runs
|
||||
self.model_info = ModelInfo()
|
||||
self.backend_options = BackendOptions()
|
||||
|
||||
|
||||
class Metadata(BaseObject):
|
||||
def __init__(
|
||||
self,
|
||||
device: Optional[str] = None,
|
||||
package_name: Optional[str] = None,
|
||||
package_version: Optional[str] = None,
|
||||
platform: Optional[str] = None,
|
||||
python_version: Optional[str] = None,
|
||||
):
|
||||
super().__init__()
|
||||
self.device = device
|
||||
self.package_name = package_name
|
||||
self.package_version = package_version
|
||||
self.platform = platform
|
||||
self.python_version = python_version
|
||||
|
||||
|
||||
class Metrics(BaseObject):
|
||||
def __init__(
|
||||
self,
|
||||
latency_ms_mean: Optional[float] = 0.0,
|
||||
throughput_qps: Optional[float] = 0.0,
|
||||
max_memory_usage_GB: Optional[float] = 0.0,
|
||||
):
|
||||
super().__init__()
|
||||
self.latency_ms_mean = latency_ms_mean
|
||||
self.throughput_qps = throughput_qps
|
||||
self.max_memory_usage_GB = max_memory_usage_GB
|
||||
|
||||
|
||||
class BenchmarkRecord:
|
||||
def __init__(
|
||||
self,
|
||||
model_name: str,
|
||||
precision: str,
|
||||
backend: str,
|
||||
device: str,
|
||||
package_name: str,
|
||||
package_version: str,
|
||||
batch_size: Optional[int] = 1,
|
||||
warmup_runs: Optional[int] = 1,
|
||||
measured_runs: Optional[int] = 10,
|
||||
trigger_date: Optional[str] = None,
|
||||
):
|
||||
self.config = Config()
|
||||
self.metrics = Metrics()
|
||||
self.metadata = Metadata()
|
||||
self.trigger_date = trigger_date or datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
self.config.model_info.full_name = model_name
|
||||
self.config.precision = precision
|
||||
self.config.backend = backend
|
||||
self.config.batch_size = batch_size
|
||||
self.config.warmup_runs = warmup_runs
|
||||
self.config.measured_runs = measured_runs
|
||||
self.metadata.device = device
|
||||
self.metadata.package_name = package_name
|
||||
self.metadata.package_version = package_version
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"config": self.config.to_dict(),
|
||||
"metadata": self.metadata.to_dict(),
|
||||
"metrics": self.metrics.to_dict(),
|
||||
"trigger_date": self.trigger_date,
|
||||
}
|
||||
|
||||
def to_json(self) -> str:
|
||||
return json.dumps(self.to_dict(), default=str)
|
||||
|
||||
@classmethod
|
||||
def save_as_csv(cls, file_name: str, records: list) -> None:
|
||||
if records is None or len(records) == 0:
|
||||
return
|
||||
rds = [record.to_dict() for record in records]
|
||||
df = pd.json_normalize(rds)
|
||||
df.to_csv(file_name, index=False)
|
||||
|
||||
@classmethod
|
||||
def save_as_json(cls, file_name: str, records: list) -> None:
|
||||
if records is None or len(records) == 0:
|
||||
return
|
||||
rds = [record.to_dict() for record in records]
|
||||
with open(file_name, "w") as f:
|
||||
json.dump(rds, f, indent=4, default=str)
|
||||
|
|
@ -7,6 +7,7 @@ import subprocess
|
|||
|
||||
import torch
|
||||
from benchmark_helper import setup_logger
|
||||
from metrics import BenchmarkRecord
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -121,11 +122,19 @@ def get_args():
|
|||
help="Number of mins to attempt the benchmark before moving on",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--log-folder",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Path to folder to save logs and results",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
setattr(args, "model_size", args.model_name.split("/")[-1].replace(".", "-")) # noqa: B010
|
||||
log_folder_name = f"./{args.model_size}_{args.precision}"
|
||||
setattr(args, "log_folder", log_folder_name) # noqa: B010
|
||||
if not args.log_folder:
|
||||
args.log_folder = log_folder_name
|
||||
os.makedirs(args.log_folder, exist_ok=True)
|
||||
|
||||
# Convert timeout value to secs
|
||||
|
|
@ -197,6 +206,9 @@ def save_results(results, filename):
|
|||
df = pd.DataFrame(
|
||||
results,
|
||||
columns=[
|
||||
"Warmup Runs",
|
||||
"Measured Runs",
|
||||
"Model Name",
|
||||
"Engine",
|
||||
"Precision",
|
||||
"Device",
|
||||
|
|
@ -211,6 +223,8 @@ def save_results(results, filename):
|
|||
)
|
||||
|
||||
# Set column types
|
||||
df["Warmup Runs"] = df["Warmup Runs"].astype("int")
|
||||
df["Measured Runs"] = df["Measured Runs"].astype("int")
|
||||
df["Batch Size"] = df["Batch Size"].astype("int")
|
||||
df["Sequence Length"] = df["Sequence Length"].astype("int")
|
||||
df["Latency (s)"] = df["Latency (s)"].astype("float")
|
||||
|
|
@ -218,7 +232,52 @@ def save_results(results, filename):
|
|||
df["Throughput (tps)"] = df["Throughput (tps)"].astype("float")
|
||||
df["Memory (GB)"] = df["Memory (GB)"].astype("float")
|
||||
|
||||
df.to_csv(filename, index=False)
|
||||
# get package name and version
|
||||
import pkg_resources
|
||||
|
||||
installed_packages = pkg_resources.working_set
|
||||
installed_packages_list = sorted(
|
||||
[
|
||||
f"{i.key}=={i.version}"
|
||||
for i in installed_packages
|
||||
if i.key in ["ort-nightly-gpu", "ort-nightly", "onnxruntime", "onnxruntime-gpu"]
|
||||
]
|
||||
)
|
||||
|
||||
ort_pkg_name = ""
|
||||
ort_pkg_version = ""
|
||||
if installed_packages_list:
|
||||
ort_pkg_name = installed_packages_list[0].split("==")[0]
|
||||
ort_pkg_version = installed_packages_list[0].split("==")[1]
|
||||
|
||||
# Save results to csv with standard format
|
||||
records = []
|
||||
for _, row in df.iterrows():
|
||||
if row["Engine"] == "optimum-ort":
|
||||
record = BenchmarkRecord(
|
||||
row["Model Name"], row["Precision"], "onnxruntime", row["Device"], ort_pkg_name, ort_pkg_version
|
||||
)
|
||||
elif row["Engine"] in ["pytorch-eager", "pytorch-compile"]:
|
||||
record = BenchmarkRecord(
|
||||
row["Model Name"], row["Precision"], "pytorch", row["Device"], torch.__name__, torch.__version__
|
||||
)
|
||||
else:
|
||||
record = BenchmarkRecord(row["Model Name"], row["Precision"], row["Engine"], row["Device"], "", "")
|
||||
record.config.warmup_runs = row["Warmup Runs"]
|
||||
record.config.measured_runs = row["Measured Runs"]
|
||||
record.config.batch_size = row["Batch Size"]
|
||||
record.config.seq_length = row["Sequence Length"]
|
||||
record.config.customized["measure_step"] = row["Step"]
|
||||
record.config.customized["engine"] = row["Engine"]
|
||||
record.metrics.customized["latency_s_mean"] = row["Latency (s)"]
|
||||
record.metrics.latency_ms_mean = row["Latency (ms)"]
|
||||
record.metrics.customized["throughput_tps"] = row["Throughput (tps)"]
|
||||
record.metrics.max_memory_usage_GB = row["Memory (GB)"]
|
||||
|
||||
records.append(record)
|
||||
|
||||
BenchmarkRecord.save_as_csv(filename, records)
|
||||
BenchmarkRecord.save_as_json(filename.replace(".csv", ".json"), records)
|
||||
logger.info(f"Results saved in {filename}!")
|
||||
|
||||
|
||||
|
|
@ -234,7 +293,7 @@ def benchmark(args, benchmark_cmd, engine):
|
|||
|
||||
# Create entries for csv
|
||||
logger.info("Gathering data from log files...")
|
||||
base_results = [engine, args.precision, args.device]
|
||||
base_results = [args.warmup_runs, args.num_runs, args.model_name, engine, args.precision, args.device]
|
||||
results = process_log_file(args.device_id, log_path, base_results)
|
||||
|
||||
return results
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import subprocess
|
|||
import librosa
|
||||
import torch
|
||||
from benchmark_helper import setup_logger
|
||||
from metrics import BenchmarkRecord
|
||||
from transformers import WhisperConfig, WhisperProcessor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -123,13 +124,21 @@ def get_args():
|
|||
help="Number of mins to attempt the benchmark before moving on",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--log-folder",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Path to folder to save logs and results",
|
||||
)
|
||||
|
||||
parser.add_argument("--tune", default=False, action="store_true")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
setattr(args, "model_size", args.model_name.split("/")[-1].replace(".", "-")) # noqa: B010
|
||||
log_folder_name = f"./{args.model_size}-{args.precision}"
|
||||
setattr(args, "log_folder", log_folder_name) # noqa: B010
|
||||
if not args.log_folder:
|
||||
args.log_folder = log_folder_name
|
||||
os.makedirs(args.log_folder, exist_ok=True)
|
||||
|
||||
# Convert timeout value to secs
|
||||
|
|
@ -235,6 +244,9 @@ def save_results(results, filename):
|
|||
df = pd.DataFrame(
|
||||
results,
|
||||
columns=[
|
||||
"Warmup Runs",
|
||||
"Measured Runs",
|
||||
"Model Name",
|
||||
"Engine",
|
||||
"Precision",
|
||||
"Device",
|
||||
|
|
@ -254,6 +266,8 @@ def save_results(results, filename):
|
|||
)
|
||||
|
||||
# Set column types
|
||||
df["Warmup Runs"] = df["Warmup Runs"].astype("int")
|
||||
df["Measured Runs"] = df["Measured Runs"].astype("int")
|
||||
df["Duration (s)"] = df["Duration (s)"].astype("float")
|
||||
df["Token Length"] = df["Token Length"].astype("int")
|
||||
df["Load Audio Latency (s)"] = df["Load Audio Latency (s)"].astype("float")
|
||||
|
|
@ -266,7 +280,55 @@ def save_results(results, filename):
|
|||
df["Memory (GB)"] = df["Memory (GB)"].astype("float")
|
||||
df["Real Time Factor (RTF)"] = df["Real Time Factor (RTF)"].astype("float")
|
||||
|
||||
df.to_csv(filename, index=False)
|
||||
# get package name and version
|
||||
import pkg_resources
|
||||
|
||||
installed_packages = pkg_resources.working_set
|
||||
installed_packages_list = sorted(
|
||||
[
|
||||
f"{i.key}=={i.version}"
|
||||
for i in installed_packages
|
||||
if i.key in ["ort-nightly-gpu", "ort-nightly", "onnxruntime", "onnxruntime-gpu"]
|
||||
]
|
||||
)
|
||||
ort_pkg_name = ""
|
||||
ort_pkg_version = ""
|
||||
if installed_packages_list:
|
||||
ort_pkg_name = installed_packages_list[0].split("==")[0]
|
||||
ort_pkg_version = installed_packages_list[0].split("==")[1]
|
||||
|
||||
# Save results to csv with standard format
|
||||
records = []
|
||||
for _, row in df.iterrows():
|
||||
if row["Engine"] == "onnxruntime":
|
||||
record = BenchmarkRecord(
|
||||
row["Model Name"], row["Precision"], row["Engine"], row["Device"], ort_pkg_name, ort_pkg_version
|
||||
)
|
||||
else:
|
||||
record = BenchmarkRecord(
|
||||
row["Model Name"], row["Precision"], row["Engine"], row["Device"], torch.__name__, torch.__version__
|
||||
)
|
||||
record.config.customized["audio_file"] = row["Audio File"]
|
||||
record.config.warmup_runs = row["Warmup Runs"]
|
||||
record.config.measured_runs = row["Measured Runs"]
|
||||
|
||||
record.metrics.customized["duration"] = row["Duration (s)"]
|
||||
record.metrics.customized["token_length"] = row["Token Length"]
|
||||
record.metrics.customized["load_audio_latency"] = row["Load Audio Latency (s)"]
|
||||
record.metrics.customized["load_audio_throughput"] = row["Load Audio Throughput (qps)"]
|
||||
record.metrics.customized["feature_extractor_latency_s"] = row["Feature Extractor Latency (s)"]
|
||||
record.metrics.customized["feature_extractor_throughput_qps"] = row["Feature Extractor Throughput (qps)"]
|
||||
record.metrics.customized["per_token_latency_ms"] = row["Per Token Latency (ms/token)"]
|
||||
record.metrics.customized["rtf"] = row["Real Time Factor (RTF)"]
|
||||
|
||||
record.metrics.latency_ms_mean = row["Latency (s)"] * 1000
|
||||
record.metrics.throughput_qps = row["Throughput (qps)"]
|
||||
record.metrics.max_memory_usage_GB = row["Memory (GB)"]
|
||||
|
||||
records.append(record)
|
||||
|
||||
BenchmarkRecord.save_as_csv(filename, records)
|
||||
BenchmarkRecord.save_as_json(filename.replace(".csv", ".json"), records)
|
||||
logger.info(f"Results saved in {filename}!")
|
||||
|
||||
|
||||
|
|
@ -282,7 +344,16 @@ def benchmark(args, benchmark_cmd, engine, audio_file, duration):
|
|||
|
||||
# Create entries for csv
|
||||
logger.info("Gathering data from log files...")
|
||||
base_results = [engine, args.precision, args.device, audio_file, duration]
|
||||
base_results = [
|
||||
args.warmup_runs,
|
||||
args.num_runs,
|
||||
args.model_name,
|
||||
engine,
|
||||
args.precision,
|
||||
args.device,
|
||||
audio_file,
|
||||
duration,
|
||||
]
|
||||
results = process_log_file(args.device_id, log_path, base_results)
|
||||
|
||||
return results
|
||||
|
|
|
|||
Loading…
Reference in a new issue