From c70838cbbbbfbf3f3375984004162c1dc6a8a437 Mon Sep 17 00:00:00 2001
From: PeixuanZuo <94887879+PeixuanZuo@users.noreply.github.com>
Date: Wed, 15 Mar 2023 11:07:17 +0800
Subject: [PATCH] [ROCm] add Conv, NhwcConv benchmark to microbench (#15017)

Add Conv, NhwcConv benchmark to microbench.

Related PR: https://github.com/microsoft/onnxruntime/pull/14982,
https://github.com/microsoft/onnxruntime/pull/14980
---
 .../python/tools/microbench/attention.py      |   2 +
 onnxruntime/python/tools/microbench/cast.py   |   2 +
 onnxruntime/python/tools/microbench/conv.py   |  62 ++++++++++++++++++
 .../python/tools/microbench/fast_gelu.py      |   2 +
 onnxruntime/python/tools/microbench/matmul.py |   2 +
 .../tools/microbench/models/conv_fp16.onnx    | Bin 0 -> 315 bytes
 .../tools/microbench/models/conv_fp32.onnx    | Bin 0 -> 315 bytes
 .../microbench/models/nhwcConv_fp16.onnx      | Bin 0 -> 342 bytes
 .../microbench/models/nhwcConv_fp32.onnx      | Bin 0 -> 342 bytes
 .../python/tools/microbench/nhwcConv.py       |  62 ++++++++++++++++++
 .../tools/microbench/skip_layer_norm.py       |   2 +
 11 files changed, 134 insertions(+)
 create mode 100644 onnxruntime/python/tools/microbench/conv.py
 create mode 100644 onnxruntime/python/tools/microbench/models/conv_fp16.onnx
 create mode 100644 onnxruntime/python/tools/microbench/models/conv_fp32.onnx
 create mode 100644 onnxruntime/python/tools/microbench/models/nhwcConv_fp16.onnx
 create mode 100644 onnxruntime/python/tools/microbench/models/nhwcConv_fp32.onnx
 create mode 100644 onnxruntime/python/tools/microbench/nhwcConv.py

diff --git a/onnxruntime/python/tools/microbench/attention.py b/onnxruntime/python/tools/microbench/attention.py
index dc8291309f..285b42b7cb 100644
--- a/onnxruntime/python/tools/microbench/attention.py
+++ b/onnxruntime/python/tools/microbench/attention.py
@@ -23,6 +23,7 @@ class BenchmarkAttention(BenchmarkOp):
     def __init__(self, args):
         BenchmarkOp.__init__(self, args)
 
+    @classmethod
     def create_inputs_outputs(cls, op_param):
         np.random.seed(0)
         input_data = np.random.rand(op_param.batch_size, op_param.seq_len, op_param.hidden_size).astype(
@@ -50,6 +51,7 @@ class BenchmarkAttention(BenchmarkOp):
         op_param = OpParam(1, 384, 768, 768 * 3, data_type)
         self.add_case(op_param, model)
 
+    @classmethod
     def case_profile(cls, op_param, time):
         profile = f"(batch_size seq_len length) = ({op_param.batch_size} {op_param.seq_len} {op_param.length}), {time:7.4f} ms"
         return profile
diff --git a/onnxruntime/python/tools/microbench/cast.py b/onnxruntime/python/tools/microbench/cast.py
index 86219a99ac..968338b080 100644
--- a/onnxruntime/python/tools/microbench/cast.py
+++ b/onnxruntime/python/tools/microbench/cast.py
@@ -30,6 +30,7 @@ class BenchmarkCast(BenchmarkOp):
     def __init__(self, args):
         BenchmarkOp.__init__(self, args)
 
+    @classmethod
     def create_inputs_outputs(cls, op_param):
         np.random.seed(0)
         input_data = np.random.rand(op_param.x, op_param.y, op_param.m, op_param.n).astype(op_param.input_data_type)
@@ -89,6 +90,7 @@ class BenchmarkCast(BenchmarkOp):
         model_param = ModelParam(32, 1024)
         self.add_model_cases(model_param, model, input_data_type, output_data_type)
 
+    @classmethod
     def case_profile(cls, op_param, time):
         profile = f"(x y m n input_data_type) = ({op_param.x} {op_param.y} {op_param.m} {op_param.n} {op_param.input_data_type}), {time:7.4f} ms"
         return profile
diff --git a/onnxruntime/python/tools/microbench/conv.py b/onnxruntime/python/tools/microbench/conv.py
new file mode 100644
index 0000000000..d3a4c6867f
--- /dev/null
+++ b/onnxruntime/python/tools/microbench/conv.py
@@ -0,0 +1,62 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+import argparse
+from dataclasses import dataclass
+
+import numpy as np
+from benchmark import BenchmarkOp, add_arguments
+
+
+@dataclass
+class OpParam:
+    n: int
+    cout: int
+    cin: int
+    h: int
+    w: int
+
+    data_type: type
+
+
+class BenchmarkConv(BenchmarkOp):
+    def __init__(self, args):
+        BenchmarkOp.__init__(self, args)
+
+    @classmethod
+    def create_inputs_outputs(cls, op_param):
+        np.random.seed(0)
+        input_data = np.random.rand(op_param.n, op_param.cin, op_param.h, op_param.w).astype(op_param.data_type)
+        weight = np.random.rand(op_param.cout, op_param.cin, 3, 3).astype(op_param.data_type)
+        bias = np.random.rand(op_param.cout).astype(op_param.data_type)
+        output = np.random.rand(op_param.n, op_param.cout, op_param.h, op_param.w).astype(op_param.data_type)
+        inputs = {"input": input_data, "weight": weight, "bias": bias}
+        outputs = {"conv": output}
+        return inputs, outputs
+
+    def create_cases(self):
+        # attributes of model : kernel_shape(3,3), group(1), pads(1,1), strides(1,1), dilations(1,1)
+        model = "models/conv_fp16.onnx" if self.args.precision == "fp16" else "models/conv_fp32.onnx"
+        data_type = np.float16 if self.args.precision == "fp16" else np.float32
+
+        # change here to test your data shape
+        self.add_case(OpParam(2, 320, 320, 64, 64, data_type), model)
+
+    @classmethod
+    def case_profile(cls, op_param, time):
+        profile = f"( n cout cin h w ) = ( {op_param.n} {op_param.cout} {op_param.cin} {op_param.h} {op_param.w} ), {time * 1000:7.4f} us"
+        return profile
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    add_arguments(parser)
+    args = parser.parse_args()
+    bm = BenchmarkConv(args)
+    bm.benchmark()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/onnxruntime/python/tools/microbench/fast_gelu.py b/onnxruntime/python/tools/microbench/fast_gelu.py
index 82f86020a6..5c6cb764f2 100644
--- a/onnxruntime/python/tools/microbench/fast_gelu.py
+++ b/onnxruntime/python/tools/microbench/fast_gelu.py
@@ -30,6 +30,7 @@ class BenchmarkFastGelu(BenchmarkOp):
     def __init__(self, args):
         BenchmarkOp.__init__(self, args)
 
+    @classmethod
     def create_inputs_outputs(cls, op_param):
         np.random.seed(0)
         a = np.random.rand(op_param.dim1, op_param.dim2, op_param.dim3).astype(op_param.data_type)
@@ -52,6 +53,7 @@ class BenchmarkFastGelu(BenchmarkOp):
         )
         self.add_case(op_param, model)
 
+    @classmethod
     def case_profile(cls, op_param, time):
         profile = f"(dim1 dim2 dim3) = ({op_param.dim1} {op_param.dim2} {op_param.dim3}), {time:7.4f} ms"
         return profile
diff --git a/onnxruntime/python/tools/microbench/matmul.py b/onnxruntime/python/tools/microbench/matmul.py
index cdac59cbbf..2e40b55c91 100644
--- a/onnxruntime/python/tools/microbench/matmul.py
+++ b/onnxruntime/python/tools/microbench/matmul.py
@@ -34,6 +34,7 @@ class BenchmarkMatMul(BenchmarkOp):
     def __init__(self, args):
         BenchmarkOp.__init__(self, args)
 
+    @classmethod
     def create_inputs_outputs(cls, op_param):
         np.random.seed(0)
         a = np.random.rand(op_param.b1, op_param.b2, op_param.m, op_param.k).astype(op_param.data_type)
@@ -85,6 +86,7 @@ class BenchmarkMatMul(BenchmarkOp):
         model_param = ModelParam(1, 384, 768, 768 * 4, 12, data_type)
         self.add_model_cases(model_param, model)
 
+    @classmethod
     def case_profile(cls, op_param, time):
         tflops = op_param.b1 * op_param.b2 * op_param.m * op_param.k * op_param.n * 2 / time / 1000000000
         profile = f"(b1 b2 m k n) = ({op_param.b1} {op_param.b2} {op_param.m} {op_param.k} {op_param.n}), {time:7.4f} ms, {tflops:4.2f} tflops"
diff --git a/onnxruntime/python/tools/microbench/models/conv_fp16.onnx b/onnxruntime/python/tools/microbench/models/conv_fp16.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..ab72aabf7c04f8ecfa64b954b6df4c412070190d
GIT binary patch
literal 315
zcmX|6K~BRk5X7<5no&ThB?>BVL8_2)-~pVX_7}OJG%<}uYe%t5U%)evcq)z+_pqZ~
zX=Z1J^Y`BbKLn}L{mEf=mTKQQu$`){V2#n=E4KCc60i`sZq=c7%4l1XlKhc*ks;j=
z=F~s<D=F@feaS(~!-wtaUY4|^BWVHF*KN>5U5>eRgKA~`B!Wk^p2CPHtT@CHR1l3+
z&_?NT??yORuQ9B6#-r!mV>Sl%B_?XZDg0E+sm(1iPveocTO;(l$b(J%aq-zOT;<tj
Rft$u0-;Syoj5VJwC;z0PL^J>Z

literal 0
HcmV?d00001

diff --git a/onnxruntime/python/tools/microbench/models/conv_fp32.onnx b/onnxruntime/python/tools/microbench/models/conv_fp32.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..3f9692c433f799b7f86c39956640ea5b58055e18
GIT binary patch
literal 315
zcmX|6K~BRk5X7<5no&ThB?>BVL8_2)-~pVX_7}OJG%<}uYe%t5U%)evcq)z+_pqZ~
zX=Z1J^Y`BbKLn}L{mEf=mTKQQu$`){V2#n=E4KCc60i`sZq=c7%4l1XlKhc*ks;j=
z=F~s<D=F@feaS(~!-wtaUY4|^BWVHF*KN>5U5>eRgKA~`B!Wk^p2CPHtT+)%P(d_O
zK^vvVy&K_Ny~eQO8IPWKkJ%X5mzbytr|?rPr#839JdH=%ZjI3IA`dq4$HiyEaFu79
S1#TL1d^@UUFxGs!ocsfp$V3nT

literal 0
HcmV?d00001

diff --git a/onnxruntime/python/tools/microbench/models/nhwcConv_fp16.onnx b/onnxruntime/python/tools/microbench/models/nhwcConv_fp16.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..d5e57160aa018caa4e09dad8bdb823c628cf2089
GIT binary patch
literal 342
zcmZ9HK~BRk5JhpEwB{?I)DkT!utBPjvfuz+MeUvgbc0gIZ7f<lid}LU794});Y3L!
zSa>v+-k+Jr*!m~H7eS<Sb#a(9QXNZ&aIbR9Lt*silD(CU*!u2y>{5>FLY;D_jJ6rc
z$Tyj$F`{E_F4evN1?e5)4_Rw@dbed>$&6;Sg=Pq=yy!ARF2U5gS`~7bw7wDM{CZZR
zHr5;*AJ^_y{hj;~O9nm%<wUooY)gZPQ=#<kW$@rnc<|f;lh(!i1&nsD2j{)$En<(c
a0o$+8sgLo0Yx-%M5LZdOnc?rf3Hb$<w@$18

literal 0
HcmV?d00001

diff --git a/onnxruntime/python/tools/microbench/models/nhwcConv_fp32.onnx b/onnxruntime/python/tools/microbench/models/nhwcConv_fp32.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..e8a5dda2c1916b41c0e4f4e3a1972114c8478b04
GIT binary patch
literal 342
zcmZ9HK~BRk5JhpEwB{?I)DkT!utBPjvfuz+MeUvgbc0gIZ7f<lid}LU794});Y3L!
zSa>v+-k+Jr*!m~H7eS<Sb#a(9QXNZ&aIbR9Lt*silD(CU*!u2y>{5>FLY;D_jJ6rc
z$Tyj$F`{E_F4evN1?e5)4_Rw@dbed>$&6;Sg=Pq=yy!ARF2U5gS`~7bw7wDM{CZZR
zHr5;*AJ^_y{hj;~OGfw{loQ>SvMmiFPKDCDm%)QS;lXnYOj;N37cknr9-Q~0w}?H)
a25i4Vr#{C2t?8$2LR=;BW`@7_Cgc~0-A;%A

literal 0
HcmV?d00001

diff --git a/onnxruntime/python/tools/microbench/nhwcConv.py b/onnxruntime/python/tools/microbench/nhwcConv.py
new file mode 100644
index 0000000000..502a292614
--- /dev/null
+++ b/onnxruntime/python/tools/microbench/nhwcConv.py
@@ -0,0 +1,62 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+import argparse
+from dataclasses import dataclass
+
+import numpy as np
+from benchmark import BenchmarkOp, add_arguments
+
+
+@dataclass
+class OpParam:
+    n: int
+    cout: int
+    cin: int
+    h: int
+    w: int
+
+    data_type: type
+
+
+class BenchmarkNhwcConv(BenchmarkOp):
+    def __init__(self, args):
+        BenchmarkOp.__init__(self, args)
+
+    @classmethod
+    def create_inputs_outputs(cls, op_param):
+        np.random.seed(0)
+        input_data = np.random.rand(op_param.n, op_param.h, op_param.w, op_param.cin).astype(op_param.data_type)
+        weight = np.random.rand(op_param.cout, 3, 3, op_param.cin).astype(op_param.data_type)
+        bias = np.random.rand(op_param.cout).astype(op_param.data_type)
+        output = np.random.rand(op_param.n, op_param.h, op_param.w, op_param.cout).astype(op_param.data_type)
+        inputs = {"input": input_data, "weight": weight, "bias": bias}
+        outputs = {"conv": output}
+        return inputs, outputs
+
+    def create_cases(self):
+        # attributes of model : kernel_shape(3,3), group(1), pads(1,1), strides(1,1), dilations(1,1)
+        model = "models/nhwcConv_fp16.onnx" if self.args.precision == "fp16" else "models/nhwcConv_fp32.onnx"
+        data_type = np.float16 if self.args.precision == "fp16" else np.float32
+
+        # change here to test your data shape
+        self.add_case(OpParam(2, 320, 320, 64, 64, data_type), model)
+
+    @classmethod
+    def case_profile(cls, op_param, time):
+        profile = f"( n cout cin h w ) = ( {op_param.n} {op_param.cout} {op_param.cin} {op_param.h} {op_param.w} ), {time * 1000:7.4f} us"
+        return profile
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    add_arguments(parser)
+    args = parser.parse_args()
+    bm = BenchmarkNhwcConv(args)
+    bm.benchmark()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/onnxruntime/python/tools/microbench/skip_layer_norm.py b/onnxruntime/python/tools/microbench/skip_layer_norm.py
index dbfda7ef30..1509f4ce92 100644
--- a/onnxruntime/python/tools/microbench/skip_layer_norm.py
+++ b/onnxruntime/python/tools/microbench/skip_layer_norm.py
@@ -22,6 +22,7 @@ class BenchmarkSkipLayerNorm(BenchmarkOp):
     def __init__(self, args):
         BenchmarkOp.__init__(self, args)
 
+    @classmethod
     def create_inputs_outputs(cls, op_param):
         np.random.seed(0)
         input_data = np.random.rand(op_param.batch_size, op_param.seq_len, op_param.hidden_size).astype(
@@ -55,6 +56,7 @@ class BenchmarkSkipLayerNorm(BenchmarkOp):
         op_param = OpParam(1, 384, 1024, data_type)
         self.add_case(op_param, model)
 
+    @classmethod
     def case_profile(cls, op_param, time):
         profile = f"(batch seq_len hidden_size) = ({op_param.batch_size} {op_param.seq_len} {op_param.hidden_size}), {time:7.4f} ms"
         return profile