mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
### Description <!-- Describe your changes. --> Optimize conv1d to go to the conv2d path to utilize the conv2d's optimization path. See whisper-tiny-encoder model becomes 158.66 ms from 532.28 ms. Conv goes to Conv2DMatMul(8 ms) instead of GroupedConv(382 ms). Old profiling result: Kernel | Time (ms) | Percentage (%) -- | -- | -- Conv\|GroupedConv | 382.99 | 71.95 MatMul | 126.16 | 23.70 Softmax | 7.01 | 1.32 Transpose | 4.59 | 0.86 Add | 4.39 | 0.82 Mul | 2.36 | 0.44 Div | 1.44 | 0.27 ReduceMean\|ReduceMeanShared | 1.25 | 0.23 Erf | 0.85 | 0.16 Sub | 0.72 | 0.14 Pow | 0.46 | 0.09 Sqrt | 0.07 | 0.01 Sum | 532.28 | New profiling result with this PR: Kernel | Time (ms) | Percentage (%) -- | -- | -- MatMul | 127.07 | 80.09 Conv\|Conv2DMatMul | 8.00 | 5.04 Softmax | 6.95 | 4.38 Transpose | 4.65 | 2.93 Add | 4.26 | 2.68 Mul | 2.56 | 1.61 Div | 1.51 | 0.95 ReduceMean\|ReduceMeanShared | 1.31 | 0.83 Erf | 0.85 | 0.54 Sub | 0.79 | 0.50 Pow | 0.46 | 0.29 Conv\|Transpose | 0.26 | 0.17 Sqrt | 0.00 | 0.00 Sum | 158.66 | --------- Co-authored-by: Yulong Wang <7679871+fs-eire@users.noreply.github.com>
69 lines
1.6 KiB
Text
69 lines
1.6 KiB
Text
[
|
|
{
|
|
"name": "conv 1D without bias addition A",
|
|
"operator": "Conv",
|
|
"inputShapeDefinitions": "rankOnly",
|
|
"opset": { "domain": "", "version": 17 },
|
|
"attributes": [{ "name": "kernel_shape", "data": [2], "type": "ints" }],
|
|
"cases": [
|
|
{
|
|
"name": "T[0]",
|
|
"inputs": [
|
|
{
|
|
"data": [10, 20, 30],
|
|
"dims": [1, 1, 3],
|
|
"type": "float32"
|
|
},
|
|
{
|
|
"data": [1, 2],
|
|
"dims": [1, 1, 2],
|
|
"type": "float32"
|
|
}
|
|
],
|
|
"outputs": [
|
|
{
|
|
"data": [50, 80],
|
|
"dims": [1, 1, 2],
|
|
"type": "float32"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"name": "conv 1D with bias addition A",
|
|
"operator": "Conv",
|
|
"inputShapeDefinitions": "rankOnly",
|
|
"opset": { "domain": "", "version": 17 },
|
|
"attributes": [{ "name": "kernel_shape", "data": [2], "type": "ints" }],
|
|
"cases": [
|
|
{
|
|
"name": "T[0]",
|
|
"inputs": [
|
|
{
|
|
"data": [10, 20, 30, 40],
|
|
"dims": [1, 2, 2],
|
|
"type": "float32"
|
|
},
|
|
{
|
|
"data": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
|
"dims": [4, 2, 2],
|
|
"type": "float32"
|
|
},
|
|
{
|
|
"data": [0.1, 0.2, 0.3, 0.4],
|
|
"dims": [4],
|
|
"type": "float32"
|
|
}
|
|
],
|
|
"outputs": [
|
|
{
|
|
"data": [100.1, 100.2, 100.3, 100.4],
|
|
"dims": [1, 4, 1],
|
|
"type": "float32"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
]
|