From c70838cbbbbfbf3f3375984004162c1dc6a8a437 Mon Sep 17 00:00:00 2001 From: PeixuanZuo <94887879+PeixuanZuo@users.noreply.github.com> Date: Wed, 15 Mar 2023 11:07:17 +0800 Subject: [PATCH] [ROCm] add Conv, NhwcConv benchmark to microbench (#15017) Add Conv, NhwcConv benchmark to microbench. Related PR: https://github.com/microsoft/onnxruntime/pull/14982, https://github.com/microsoft/onnxruntime/pull/14980 --- .../python/tools/microbench/attention.py | 2 + onnxruntime/python/tools/microbench/cast.py | 2 + onnxruntime/python/tools/microbench/conv.py | 62 ++++++++++++++++++ .../python/tools/microbench/fast_gelu.py | 2 + onnxruntime/python/tools/microbench/matmul.py | 2 + .../tools/microbench/models/conv_fp16.onnx | Bin 0 -> 315 bytes .../tools/microbench/models/conv_fp32.onnx | Bin 0 -> 315 bytes .../microbench/models/nhwcConv_fp16.onnx | Bin 0 -> 342 bytes .../microbench/models/nhwcConv_fp32.onnx | Bin 0 -> 342 bytes .../python/tools/microbench/nhwcConv.py | 62 ++++++++++++++++++ .../tools/microbench/skip_layer_norm.py | 2 + 11 files changed, 134 insertions(+) create mode 100644 onnxruntime/python/tools/microbench/conv.py create mode 100644 onnxruntime/python/tools/microbench/models/conv_fp16.onnx create mode 100644 onnxruntime/python/tools/microbench/models/conv_fp32.onnx create mode 100644 onnxruntime/python/tools/microbench/models/nhwcConv_fp16.onnx create mode 100644 onnxruntime/python/tools/microbench/models/nhwcConv_fp32.onnx create mode 100644 onnxruntime/python/tools/microbench/nhwcConv.py diff --git a/onnxruntime/python/tools/microbench/attention.py b/onnxruntime/python/tools/microbench/attention.py index dc8291309f..285b42b7cb 100644 --- a/onnxruntime/python/tools/microbench/attention.py +++ b/onnxruntime/python/tools/microbench/attention.py @@ -23,6 +23,7 @@ class BenchmarkAttention(BenchmarkOp): def __init__(self, args): BenchmarkOp.__init__(self, args) + @classmethod def create_inputs_outputs(cls, op_param): np.random.seed(0) input_data = np.random.rand(op_param.batch_size, op_param.seq_len, op_param.hidden_size).astype( @@ -50,6 +51,7 @@ class BenchmarkAttention(BenchmarkOp): op_param = OpParam(1, 384, 768, 768 * 3, data_type) self.add_case(op_param, model) + @classmethod def case_profile(cls, op_param, time): profile = f"(batch_size seq_len length) = ({op_param.batch_size} {op_param.seq_len} {op_param.length}), {time:7.4f} ms" return profile diff --git a/onnxruntime/python/tools/microbench/cast.py b/onnxruntime/python/tools/microbench/cast.py index 86219a99ac..968338b080 100644 --- a/onnxruntime/python/tools/microbench/cast.py +++ b/onnxruntime/python/tools/microbench/cast.py @@ -30,6 +30,7 @@ class BenchmarkCast(BenchmarkOp): def __init__(self, args): BenchmarkOp.__init__(self, args) + @classmethod def create_inputs_outputs(cls, op_param): np.random.seed(0) input_data = np.random.rand(op_param.x, op_param.y, op_param.m, op_param.n).astype(op_param.input_data_type) @@ -89,6 +90,7 @@ class BenchmarkCast(BenchmarkOp): model_param = ModelParam(32, 1024) self.add_model_cases(model_param, model, input_data_type, output_data_type) + @classmethod def case_profile(cls, op_param, time): profile = f"(x y m n input_data_type) = ({op_param.x} {op_param.y} {op_param.m} {op_param.n} {op_param.input_data_type}), {time:7.4f} ms" return profile diff --git a/onnxruntime/python/tools/microbench/conv.py b/onnxruntime/python/tools/microbench/conv.py new file mode 100644 index 0000000000..d3a4c6867f --- /dev/null +++ b/onnxruntime/python/tools/microbench/conv.py @@ -0,0 +1,62 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +import argparse +from dataclasses import dataclass + +import numpy as np +from benchmark import BenchmarkOp, add_arguments + + +@dataclass +class OpParam: + n: int + cout: int + cin: int + h: int + w: int + + data_type: type + + +class BenchmarkConv(BenchmarkOp): + def __init__(self, args): + BenchmarkOp.__init__(self, args) + + @classmethod + def create_inputs_outputs(cls, op_param): + np.random.seed(0) + input_data = np.random.rand(op_param.n, op_param.cin, op_param.h, op_param.w).astype(op_param.data_type) + weight = np.random.rand(op_param.cout, op_param.cin, 3, 3).astype(op_param.data_type) + bias = np.random.rand(op_param.cout).astype(op_param.data_type) + output = np.random.rand(op_param.n, op_param.cout, op_param.h, op_param.w).astype(op_param.data_type) + inputs = {"input": input_data, "weight": weight, "bias": bias} + outputs = {"conv": output} + return inputs, outputs + + def create_cases(self): + # attributes of model : kernel_shape(3,3), group(1), pads(1,1), strides(1,1), dilations(1,1) + model = "models/conv_fp16.onnx" if self.args.precision == "fp16" else "models/conv_fp32.onnx" + data_type = np.float16 if self.args.precision == "fp16" else np.float32 + + # change here to test your data shape + self.add_case(OpParam(2, 320, 320, 64, 64, data_type), model) + + @classmethod + def case_profile(cls, op_param, time): + profile = f"( n cout cin h w ) = ( {op_param.n} {op_param.cout} {op_param.cin} {op_param.h} {op_param.w} ), {time * 1000:7.4f} us" + return profile + + +def main(): + parser = argparse.ArgumentParser() + add_arguments(parser) + args = parser.parse_args() + bm = BenchmarkConv(args) + bm.benchmark() + + +if __name__ == "__main__": + main() diff --git a/onnxruntime/python/tools/microbench/fast_gelu.py b/onnxruntime/python/tools/microbench/fast_gelu.py index 82f86020a6..5c6cb764f2 100644 --- a/onnxruntime/python/tools/microbench/fast_gelu.py +++ b/onnxruntime/python/tools/microbench/fast_gelu.py @@ -30,6 +30,7 @@ class BenchmarkFastGelu(BenchmarkOp): def __init__(self, args): BenchmarkOp.__init__(self, args) + @classmethod def create_inputs_outputs(cls, op_param): np.random.seed(0) a = np.random.rand(op_param.dim1, op_param.dim2, op_param.dim3).astype(op_param.data_type) @@ -52,6 +53,7 @@ class BenchmarkFastGelu(BenchmarkOp): ) self.add_case(op_param, model) + @classmethod def case_profile(cls, op_param, time): profile = f"(dim1 dim2 dim3) = ({op_param.dim1} {op_param.dim2} {op_param.dim3}), {time:7.4f} ms" return profile diff --git a/onnxruntime/python/tools/microbench/matmul.py b/onnxruntime/python/tools/microbench/matmul.py index cdac59cbbf..2e40b55c91 100644 --- a/onnxruntime/python/tools/microbench/matmul.py +++ b/onnxruntime/python/tools/microbench/matmul.py @@ -34,6 +34,7 @@ class BenchmarkMatMul(BenchmarkOp): def __init__(self, args): BenchmarkOp.__init__(self, args) + @classmethod def create_inputs_outputs(cls, op_param): np.random.seed(0) a = np.random.rand(op_param.b1, op_param.b2, op_param.m, op_param.k).astype(op_param.data_type) @@ -85,6 +86,7 @@ class BenchmarkMatMul(BenchmarkOp): model_param = ModelParam(1, 384, 768, 768 * 4, 12, data_type) self.add_model_cases(model_param, model) + @classmethod def case_profile(cls, op_param, time): tflops = op_param.b1 * op_param.b2 * op_param.m * op_param.k * op_param.n * 2 / time / 1000000000 profile = f"(b1 b2 m k n) = ({op_param.b1} {op_param.b2} {op_param.m} {op_param.k} {op_param.n}), {time:7.4f} ms, {tflops:4.2f} tflops" diff --git a/onnxruntime/python/tools/microbench/models/conv_fp16.onnx b/onnxruntime/python/tools/microbench/models/conv_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..ab72aabf7c04f8ecfa64b954b6df4c412070190d GIT binary patch literal 315 zcmX|6K~BRk5X7<5no&ThB?>BVL8_2)-~pVX_7}OJG%<}uYe%t5U%)evcq)z+_pqZ~ zX=Z1J^Y`BbKLn}L{mEf=mTKQQu$`){V2#n=E4KCc60i`sZq=c7%4l1XlKhc*ks;j= z=F~s5U5>eRgKA~`B!Wk^p2CPHtT@CHR1l3+ z&_?NT??yORuQ9B6#-r!mV>Sl%B_?XZDg0E+sm(1iPveocTO;(l$b(J%aq-zOT;Z literal 0 HcmV?d00001 diff --git a/onnxruntime/python/tools/microbench/models/conv_fp32.onnx b/onnxruntime/python/tools/microbench/models/conv_fp32.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3f9692c433f799b7f86c39956640ea5b58055e18 GIT binary patch literal 315 zcmX|6K~BRk5X7<5no&ThB?>BVL8_2)-~pVX_7}OJG%<}uYe%t5U%)evcq)z+_pqZ~ zX=Z1J^Y`BbKLn}L{mEf=mTKQQu$`){V2#n=E4KCc60i`sZq=c7%4l1XlKhc*ks;j= z=F~s5U5>eRgKA~`B!Wk^p2CPHtT+)%P(d_O zK^vvVy&K_Ny~eQO8IPWKkJ%X5mzbytr|?rPr#839JdH=%ZjI3IA`dq4$HiyEaFu79 S1#TL1d^@UUFxGs!ocsfp$V3nT literal 0 HcmV?d00001 diff --git a/onnxruntime/python/tools/microbench/models/nhwcConv_fp16.onnx b/onnxruntime/python/tools/microbench/models/nhwcConv_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d5e57160aa018caa4e09dad8bdb823c628cf2089 GIT binary patch literal 342 zcmZ9HK~BRk5JhpEwB{?I)DkT!utBPjvfuz+MeUvgbc0gIZ7fv+-k+Jr*!m~H7eS{5>FLY;D_jJ6rc z$Tyj$F`{E_F4evN1?e5)4_Rw@dbed>$&6;Sg=Pq=yy!ARF2U5gS`~7bw7wDM{CZZR zHr5;*AJ^_y{hj;~O9nm%v+-k+Jr*!m~H7eS{5>FLY;D_jJ6rc z$Tyj$F`{E_F4evN1?e5)4_Rw@dbed>$&6;Sg=Pq=yy!ARF2U5gS`~7bw7wDM{CZZR zHr5;*AJ^_y{hj;~OGfw{loQ>SvMmiFPKDCDm%)QS;lXnYOj;N37cknr9-Q~0w}?H) a25i4Vr#{C2t?8$2LR=;BW`@7_Cgc~0-A;%A literal 0 HcmV?d00001 diff --git a/onnxruntime/python/tools/microbench/nhwcConv.py b/onnxruntime/python/tools/microbench/nhwcConv.py new file mode 100644 index 0000000000..502a292614 --- /dev/null +++ b/onnxruntime/python/tools/microbench/nhwcConv.py @@ -0,0 +1,62 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +import argparse +from dataclasses import dataclass + +import numpy as np +from benchmark import BenchmarkOp, add_arguments + + +@dataclass +class OpParam: + n: int + cout: int + cin: int + h: int + w: int + + data_type: type + + +class BenchmarkNhwcConv(BenchmarkOp): + def __init__(self, args): + BenchmarkOp.__init__(self, args) + + @classmethod + def create_inputs_outputs(cls, op_param): + np.random.seed(0) + input_data = np.random.rand(op_param.n, op_param.h, op_param.w, op_param.cin).astype(op_param.data_type) + weight = np.random.rand(op_param.cout, 3, 3, op_param.cin).astype(op_param.data_type) + bias = np.random.rand(op_param.cout).astype(op_param.data_type) + output = np.random.rand(op_param.n, op_param.h, op_param.w, op_param.cout).astype(op_param.data_type) + inputs = {"input": input_data, "weight": weight, "bias": bias} + outputs = {"conv": output} + return inputs, outputs + + def create_cases(self): + # attributes of model : kernel_shape(3,3), group(1), pads(1,1), strides(1,1), dilations(1,1) + model = "models/nhwcConv_fp16.onnx" if self.args.precision == "fp16" else "models/nhwcConv_fp32.onnx" + data_type = np.float16 if self.args.precision == "fp16" else np.float32 + + # change here to test your data shape + self.add_case(OpParam(2, 320, 320, 64, 64, data_type), model) + + @classmethod + def case_profile(cls, op_param, time): + profile = f"( n cout cin h w ) = ( {op_param.n} {op_param.cout} {op_param.cin} {op_param.h} {op_param.w} ), {time * 1000:7.4f} us" + return profile + + +def main(): + parser = argparse.ArgumentParser() + add_arguments(parser) + args = parser.parse_args() + bm = BenchmarkNhwcConv(args) + bm.benchmark() + + +if __name__ == "__main__": + main() diff --git a/onnxruntime/python/tools/microbench/skip_layer_norm.py b/onnxruntime/python/tools/microbench/skip_layer_norm.py index dbfda7ef30..1509f4ce92 100644 --- a/onnxruntime/python/tools/microbench/skip_layer_norm.py +++ b/onnxruntime/python/tools/microbench/skip_layer_norm.py @@ -22,6 +22,7 @@ class BenchmarkSkipLayerNorm(BenchmarkOp): def __init__(self, args): BenchmarkOp.__init__(self, args) + @classmethod def create_inputs_outputs(cls, op_param): np.random.seed(0) input_data = np.random.rand(op_param.batch_size, op_param.seq_len, op_param.hidden_size).astype( @@ -55,6 +56,7 @@ class BenchmarkSkipLayerNorm(BenchmarkOp): op_param = OpParam(1, 384, 1024, data_type) self.add_case(op_param, model) + @classmethod def case_profile(cls, op_param, time): profile = f"(batch seq_len hidden_size) = ({op_param.batch_size} {op_param.seq_len} {op_param.hidden_size}), {time:7.4f} ms" return profile