mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-29 23:06:41 +00:00
111 lines
3.7 KiB
Python
111 lines
3.7 KiB
Python
from abc import ABC, abstractmethod
|
|
from argparse import ArgumentParser
|
|
import time
|
|
import numpy
|
|
import onnxruntime as ort
|
|
import torch
|
|
|
|
|
|
def numpy_type(torch_type):
|
|
type_map = {torch.float32: numpy.float32,
|
|
torch.float16: numpy.float16}
|
|
return type_map[torch_type]
|
|
|
|
|
|
def add_arguments(parser: ArgumentParser):
|
|
parser.add_argument("--provider", required=False, type=str, default="rocm", help="Execution provider to use")
|
|
parser.add_argument("--precision", required=False, type=str, default="fp16", help="Number format to use")
|
|
parser.add_argument('--profiling', type=bool, default=False, help='If enable profiling')
|
|
|
|
|
|
class Benchmark:
|
|
def __init__(self, model, inputs, outputs, args):
|
|
self.provider = args.provider
|
|
self.profiling = args.profiling
|
|
self.model = model
|
|
self.inputs = inputs
|
|
self.outputs = outputs
|
|
|
|
def create_input_output_tensors(self):
|
|
device = "cuda"
|
|
input_tensors = {name: torch.from_numpy(array).to(device) for name, array in self.inputs.items()}
|
|
output_tensors = {name: torch.from_numpy(array).to(device) for name, array in self.outputs.items()}
|
|
return input_tensors, output_tensors
|
|
|
|
@classmethod
|
|
def create_io_binding(cls, sess, input_tensors, output_tensors):
|
|
io_binding = sess.io_binding()
|
|
for name, tensor in input_tensors.items():
|
|
io_binding.bind_input(name, tensor.device.type, 0, numpy_type(tensor.dtype), tensor.shape, tensor.data_ptr())
|
|
for name, tensor in output_tensors.items():
|
|
io_binding.bind_output(name, tensor.device.type, 0, numpy_type(tensor.dtype), tensor.shape, tensor.data_ptr())
|
|
return io_binding
|
|
|
|
def create_session(self):
|
|
sess_opt = ort.SessionOptions()
|
|
sess_opt.enable_profiling = self.profiling
|
|
if self.provider == "rocm":
|
|
execution_provider = ["ROCMExecutionProvider"]
|
|
elif self.provider == "cuda":
|
|
execution_provider = ["CUDAExecutionProvider"]
|
|
else:
|
|
raise ValueError(f"The script doesn't support provider type '{self.provider}' yet.")
|
|
|
|
sess = ort.InferenceSession(self.model, sess_options=sess_opt, providers=execution_provider)
|
|
|
|
if self.provider == "rocm":
|
|
assert 'ROCMExecutionProvider' in sess.get_providers()
|
|
elif self.provider == "cuda":
|
|
assert 'CUDAExecutionProvider' in sess.get_providers()
|
|
|
|
return sess
|
|
|
|
def benchmark(self):
|
|
sess = self.create_session()
|
|
input_tensors, output_tensors = self.create_input_output_tensors()
|
|
io_binding = self.create_io_binding(sess, input_tensors, output_tensors)
|
|
|
|
# warm up
|
|
for iter in range(10):
|
|
sess.run_with_iobinding(io_binding)
|
|
|
|
# measure
|
|
max_iters = 100
|
|
start_time = time.time()
|
|
for iter in range(max_iters):
|
|
sess.run_with_iobinding(io_binding)
|
|
|
|
# time is in milliseconds
|
|
elapsed_time = (time.time() - start_time) * 1000 / max_iters
|
|
return elapsed_time
|
|
|
|
|
|
class BenchmarkOp(ABC):
|
|
def __init__(self, args):
|
|
self.args = args
|
|
self.cases = []
|
|
|
|
@classmethod
|
|
@abstractmethod
|
|
def create_inputs_outputs(cls, op_param):
|
|
...
|
|
|
|
def add_case(self, op_param, model):
|
|
self.cases += [(op_param, model)]
|
|
|
|
@abstractmethod
|
|
def create_cases(self):
|
|
...
|
|
|
|
@classmethod
|
|
@abstractmethod
|
|
def case_profile(cls, op_param, time):
|
|
...
|
|
|
|
def benchmark(self):
|
|
self.create_cases()
|
|
for op_param, model in self.cases:
|
|
inputs, outputs = self.create_inputs_outputs(op_param)
|
|
bm = Benchmark(model, inputs, outputs, self.args)
|
|
time = bm.benchmark()
|
|
print(self.case_profile(op_param, time))
|