From 9cbcc93e03b85bc4c935408942e39c428b3e63f3 Mon Sep 17 00:00:00 2001 From: zhangyaobit <1034716+zhangyaobit@users.noreply.github.com> Date: Wed, 9 Mar 2022 18:18:51 -0800 Subject: [PATCH] Add micro-benchmarks for Attention and SkipLayerNormalization ops. (#10798) * Add micro-benchmarks for Attention and SkipLayerNormalization ops. * Add choices for argument provider and precision. * Automatically select CUDA or ROCM execution provider. --- .../python/tools/microbench/attention.py | 52 +++++++++++++ .../python/tools/microbench/benchmark.py | 76 +++++++++++++------ .../microbench/models/attention_fp16.onnx | 40 ++++++++++ .../microbench/models/attention_fp32.onnx | 36 +++++++++ .../models/skip_layer_norm_fp16.onnx | 41 ++++++++++ .../models/skip_layer_norm_fp32.onnx | 35 +++++++++ .../tools/microbench/skip_layer_norm.py | 54 +++++++++++++ 7 files changed, 309 insertions(+), 25 deletions(-) create mode 100644 onnxruntime/python/tools/microbench/attention.py create mode 100644 onnxruntime/python/tools/microbench/models/attention_fp16.onnx create mode 100644 onnxruntime/python/tools/microbench/models/attention_fp32.onnx create mode 100644 onnxruntime/python/tools/microbench/models/skip_layer_norm_fp16.onnx create mode 100644 onnxruntime/python/tools/microbench/models/skip_layer_norm_fp32.onnx create mode 100644 onnxruntime/python/tools/microbench/skip_layer_norm.py diff --git a/onnxruntime/python/tools/microbench/attention.py b/onnxruntime/python/tools/microbench/attention.py new file mode 100644 index 0000000000..8e5e0df584 --- /dev/null +++ b/onnxruntime/python/tools/microbench/attention.py @@ -0,0 +1,52 @@ +import argparse +from dataclasses import dataclass +import numpy as np +from benchmark import BenchmarkOp, add_arguments + + +@dataclass +class OpParam: + batch_size: int + seq_len: int + hidden_size: int + length: int + data_type: type + + +class BenchmarkAttention(BenchmarkOp): + def __init__(self, args): + BenchmarkOp.__init__(self, args) + + def create_inputs_outputs(cls, op_param): + np.random.seed(0) + input_data = np.random.rand(op_param.batch_size, op_param.seq_len, op_param.hidden_size).astype(op_param.data_type) + weight = np.random.rand(op_param.hidden_size, op_param.length).astype(op_param.data_type) + bias = np.random.rand(op_param.length).astype(op_param.data_type) + mask_index = np.random.rand(op_param.batch_size, op_param.seq_len).astype(np.int32) + output_data = np.random.rand(op_param.batch_size, op_param.seq_len, op_param.hidden_size).astype(op_param.data_type) + inputs = {"INPUT": input_data, "WEIGHT": weight, "BIAS": bias, "MASK_INDEX": mask_index} + outputs = {"return_val": output_data} + return inputs, outputs + + def create_cases(self): + model = "models/attention_fp16.onnx" if self.args.precision == "fp16" else "models/attention_fp32.onnx" + data_type = np.float16 if self.args.precision == "fp16" else np.float32 + # bert-base + op_param = OpParam(1, 384, 768, 768 * 3, data_type) + self.add_case(op_param, model) + + def case_profile(cls, op_param, time): + profile = f"(batch_size seq_len length) = ({op_param.batch_size} {op_param.seq_len} {op_param.length}), {time:7.4f} ms" + return profile + + +def main(): + parser = argparse.ArgumentParser() + add_arguments(parser) + args = parser.parse_args() + bm = BenchmarkAttention(args) + bm.benchmark() + + +if __name__ == "__main__": + main() diff --git a/onnxruntime/python/tools/microbench/benchmark.py b/onnxruntime/python/tools/microbench/benchmark.py index cb8e5f57c2..d5a658ed2e 100644 --- a/onnxruntime/python/tools/microbench/benchmark.py +++ b/onnxruntime/python/tools/microbench/benchmark.py @@ -1,63 +1,89 @@ from abc import ABC, abstractmethod from argparse import ArgumentParser -import time +import logging import numpy import onnxruntime as ort +import time import torch +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + def numpy_type(torch_type): type_map = {torch.float32: numpy.float32, - torch.float16: numpy.float16} + torch.float16: numpy.float16, + torch.int32: numpy.int32} return type_map[torch_type] def add_arguments(parser: ArgumentParser): - parser.add_argument("--provider", required=False, type=str, default="rocm", help="Execution provider to use") - parser.add_argument("--precision", required=False, type=str, default="fp16", help="Number format to use") - parser.add_argument('--profiling', type=bool, default=False, help='If enable profiling') + parser.add_argument("--provider", required=False, type=str, + choices=["cuda", "rocm", "cpu", None], default=None, + help=("Execution provider to use. By default, a " + "provider is selected in the priority order " + "(cuda|rocm, cpu) depending on availability.")) + parser.add_argument("--precision", required=False, type=str, + choices=["fp16", "fp32"], default="fp16", + help="Number format to use") + parser.add_argument('--profiling', required=False, type=bool, + default=False, help='If enable profiling') + + +def provider_name(name): + provider_map = {"cuda": "CUDAExecutionProvider", + "rocm": "ROCMExecutionProvider", + "cpu": "CPUExecutionProvider"} + return provider_map[name] + + +def get_default_provider(): + if "CUDAExecutionProvider" in ort.get_available_providers(): + return "CUDAExecutionProvider" + if "ROCMExecutionProvider" in ort.get_available_providers(): + return "ROCMExecutionProvider" + return "CPUExecutionProvider" class Benchmark: def __init__(self, model, inputs, outputs, args): - self.provider = args.provider + self.provider = (get_default_provider() if args.provider == None + else provider_name(args.provider)) + logger.info(f"Execution provider: {self.provider}") self.profiling = args.profiling self.model = model + logger.info(f"Model: {self.model}") self.inputs = inputs self.outputs = outputs def create_input_output_tensors(self): - device = "cuda" - input_tensors = {name: torch.from_numpy(array).to(device) for name, array in self.inputs.items()} - output_tensors = {name: torch.from_numpy(array).to(device) for name, array in self.outputs.items()} + on_gpu = (self.provider == "CUDAExecutionProvider" + or self.provider == "ROCMExecutionProvider") + device = "cuda" if on_gpu else "cpu" + input_tensors = {name: torch.from_numpy(array).to(device) + for name, array in self.inputs.items()} + output_tensors = {name: torch.from_numpy(array).to(device) + for name, array in self.outputs.items()} return input_tensors, output_tensors @classmethod def create_io_binding(cls, sess, input_tensors, output_tensors): io_binding = sess.io_binding() for name, tensor in input_tensors.items(): - io_binding.bind_input(name, tensor.device.type, 0, numpy_type(tensor.dtype), tensor.shape, tensor.data_ptr()) + io_binding.bind_input(name, tensor.device.type, 0, + numpy_type(tensor.dtype), tensor.shape, + tensor.data_ptr()) for name, tensor in output_tensors.items(): - io_binding.bind_output(name, tensor.device.type, 0, numpy_type(tensor.dtype), tensor.shape, tensor.data_ptr()) + io_binding.bind_output(name, tensor.device.type, 0, + numpy_type(tensor.dtype), tensor.shape, + tensor.data_ptr()) return io_binding def create_session(self): sess_opt = ort.SessionOptions() sess_opt.enable_profiling = self.profiling - if self.provider == "rocm": - execution_provider = ["ROCMExecutionProvider"] - elif self.provider == "cuda": - execution_provider = ["CUDAExecutionProvider"] - else: - raise ValueError(f"The script doesn't support provider type '{self.provider}' yet.") - - sess = ort.InferenceSession(self.model, sess_options=sess_opt, providers=execution_provider) - - if self.provider == "rocm": - assert 'ROCMExecutionProvider' in sess.get_providers() - elif self.provider == "cuda": - assert 'CUDAExecutionProvider' in sess.get_providers() - + sess = ort.InferenceSession(self.model, sess_options=sess_opt, + providers=[self.provider]) return sess def benchmark(self): diff --git a/onnxruntime/python/tools/microbench/models/attention_fp16.onnx b/onnxruntime/python/tools/microbench/models/attention_fp16.onnx new file mode 100644 index 0000000000..33c426a72c --- /dev/null +++ b/onnxruntime/python/tools/microbench/models/attention_fp16.onnx @@ -0,0 +1,40 @@ +p2o:Ë +Y +INPUT +WEIGHT +BIAS + +MASK_INDEX +return_val" Attention* + num_heads  : com.microsoft attentionZ7 +INPUT. +, +( +  +batch_size + seq_len +  hidden_sizeZ& +WEIGHT + + +  hidden_size +lenZ +BIAS +  + +lenZ- + +MASK_INDEX + +  +batch_size + seq_lenb< + +return_val. +, +( +  +batch_size + seq_len +  hidden_sizeB + com.microsoft \ No newline at end of file diff --git a/onnxruntime/python/tools/microbench/models/attention_fp32.onnx b/onnxruntime/python/tools/microbench/models/attention_fp32.onnx new file mode 100644 index 0000000000..c263c9aa57 --- /dev/null +++ b/onnxruntime/python/tools/microbench/models/attention_fp32.onnx @@ -0,0 +1,36 @@ +p2o:Ë +Y +INPUT +WEIGHT +BIAS + +MASK_INDEX +return_val" Attention* + num_heads  : com.microsoft attentionZ7 +INPUT. +,( +  +batch_size + seq_len +  hidden_sizeZ& +WEIGHT + +  hidden_size +lenZ +BIAS +  +lenZ- + +MASK_INDEX + +  +batch_size + seq_lenb< + +return_val. +,( +  +batch_size + seq_len +  hidden_sizeB + com.microsoft \ No newline at end of file diff --git a/onnxruntime/python/tools/microbench/models/skip_layer_norm_fp16.onnx b/onnxruntime/python/tools/microbench/models/skip_layer_norm_fp16.onnx new file mode 100644 index 0000000000..a460a2b473 --- /dev/null +++ b/onnxruntime/python/tools/microbench/models/skip_layer_norm_fp16.onnx @@ -0,0 +1,41 @@ +p2o:þ +f +INPUT +SKIP +GAMMA +BETA +BIAS +return_val"SkipLayerNormalization* +epsilonoƒ: : com.microsoftskip_layer_normalizationZ2 +INPUT) +' +# +batch + seq_len +  hidden_sizeZ1 +SKIP) +' +# +batch + seq_len +  hidden_sizeZ +GAMMA + + +  hidden_sizeZ +BETA + + +  hidden_sizeZ +BIAS + + +  hidden_sizeb5 + +return_val' +% +! +batch + seq_len +  inter_dimB + com.microsoft \ No newline at end of file diff --git a/onnxruntime/python/tools/microbench/models/skip_layer_norm_fp32.onnx b/onnxruntime/python/tools/microbench/models/skip_layer_norm_fp32.onnx new file mode 100644 index 0000000000..542f063d0a --- /dev/null +++ b/onnxruntime/python/tools/microbench/models/skip_layer_norm_fp32.onnx @@ -0,0 +1,35 @@ +p2o:þ +f +INPUT +SKIP +GAMMA +BETA +BIAS +return_val"SkipLayerNormalization* +epsilonoƒ: : com.microsoftskip_layer_normalizationZ2 +INPUT) +'# +batch + seq_len +  hidden_sizeZ1 +SKIP) +'# +batch + seq_len +  hidden_sizeZ +GAMMA + +  hidden_sizeZ +BETA + +  hidden_sizeZ +BIAS + +  hidden_sizeb5 + +return_val' +%! +batch + seq_len +  inter_dimB + com.microsoft \ No newline at end of file diff --git a/onnxruntime/python/tools/microbench/skip_layer_norm.py b/onnxruntime/python/tools/microbench/skip_layer_norm.py new file mode 100644 index 0000000000..16446d70ed --- /dev/null +++ b/onnxruntime/python/tools/microbench/skip_layer_norm.py @@ -0,0 +1,54 @@ +import argparse +from dataclasses import dataclass +import numpy as np +from benchmark import BenchmarkOp, add_arguments + + +@dataclass +class OpParam: + batch_size: int + seq_len: int + hidden_size: int + data_type: type + + +class BenchmarkSkipLayerNorm(BenchmarkOp): + def __init__(self, args): + BenchmarkOp.__init__(self, args) + + def create_inputs_outputs(cls, op_param): + np.random.seed(0) + input_data = np.random.rand(op_param.batch_size, op_param.seq_len, op_param.hidden_size).astype(op_param.data_type) + skip = np.random.rand(op_param.batch_size, op_param.seq_len, op_param.hidden_size).astype(op_param.data_type) + gamma = np.random.rand(op_param.hidden_size).astype(op_param.data_type) + beta = np.random.rand(op_param.hidden_size).astype(op_param.data_type) + bias = np.random.rand(op_param.hidden_size).astype(op_param.data_type) + output_data = np.random.rand(op_param.batch_size, op_param.seq_len, op_param.hidden_size).astype(op_param.data_type) + + inputs = {"INPUT": input_data, "SKIP": skip, "GAMMA": gamma, "BETA": beta, "BIAS": bias} + outputs = {"return_val": output_data} + + return inputs, outputs + + def create_cases(self): + model = "models/skip_layer_norm_fp16.onnx" if self.args.precision == "fp16" else "models/skip_layer_norm_fp32.onnx" + data_type = np.float16 if self.args.precision == "fp16" else np.float32 + # bert-large + op_param = OpParam(1, 384, 1024, data_type) + self.add_case(op_param, model) + + def case_profile(cls, op_param, time): + profile = f"(batch seq_len hidden_size) = ({op_param.batch_size} {op_param.seq_len} {op_param.hidden_size}), {time:7.4f} ms" + return profile + + +def main(): + parser = argparse.ArgumentParser() + add_arguments(parser) + args = parser.parse_args() + bm = BenchmarkSkipLayerNorm(args) + bm.benchmark() + + +if __name__ == "__main__": + main()