2021-04-27 07:04:25 +00:00
|
|
|
[
|
|
|
|
|
{
|
|
|
|
|
"name": "Transpose - no perm",
|
|
|
|
|
"operator": "Transpose",
|
|
|
|
|
"attributes": [],
|
|
|
|
|
"cases": [
|
|
|
|
|
{
|
|
|
|
|
"name": "T[2,3]",
|
|
|
|
|
"inputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": [1, 2, 3, 4, 5, 6],
|
|
|
|
|
"dims": [2, 3],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": [1, 4, 2, 5, 3, 6],
|
|
|
|
|
"dims": [3, 2],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "Transpose - perms:[0, 1, 2]",
|
|
|
|
|
"operator": "Transpose",
|
|
|
|
|
"attributes": [{ "name": "perm", "data": [0, 1, 2], "type": "ints" }],
|
|
|
|
|
"cases": [
|
|
|
|
|
{
|
|
|
|
|
"name": "T[2,3]",
|
|
|
|
|
"inputs": [
|
|
|
|
|
{
|
2023-07-28 22:46:58 +00:00
|
|
|
"data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24],
|
2021-04-27 07:04:25 +00:00
|
|
|
"dims": [2, 3, 4],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
2023-07-28 22:46:58 +00:00
|
|
|
"data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24],
|
2021-04-27 07:04:25 +00:00
|
|
|
"dims": [2, 3, 4],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "Transpose - perms:[0, 2, 1]",
|
|
|
|
|
"operator": "Transpose",
|
|
|
|
|
"attributes": [{ "name": "perm", "data": [0, 2, 1], "type": "ints" }],
|
|
|
|
|
"cases": [
|
|
|
|
|
{
|
|
|
|
|
"name": "T[2,3]",
|
|
|
|
|
"inputs": [
|
|
|
|
|
{
|
2023-07-28 22:46:58 +00:00
|
|
|
"data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24],
|
2021-04-27 07:04:25 +00:00
|
|
|
"dims": [2, 3, 4],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
2023-07-28 22:46:58 +00:00
|
|
|
"data": [1, 5, 9, 2, 6, 10, 3, 7, 11, 4, 8, 12, 13, 17, 21, 14, 18, 22, 15, 19, 23, 16, 20, 24],
|
2021-04-27 07:04:25 +00:00
|
|
|
"dims": [2, 4, 3],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "Transpose - perms:[2, 1, 0]",
|
|
|
|
|
"operator": "Transpose",
|
|
|
|
|
"attributes": [{ "name": "perm", "data": [2, 1, 0], "type": "ints" }],
|
|
|
|
|
"cases": [
|
|
|
|
|
{
|
|
|
|
|
"name": "T[2,3]",
|
|
|
|
|
"inputs": [
|
|
|
|
|
{
|
2023-07-28 22:46:58 +00:00
|
|
|
"data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24],
|
2021-04-27 07:04:25 +00:00
|
|
|
"dims": [2, 3, 4],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
2023-07-28 22:46:58 +00:00
|
|
|
"data": [1, 13, 5, 17, 9, 21, 2, 14, 6, 18, 10, 22, 3, 15, 7, 19, 11, 23, 4, 16, 8, 20, 12, 24],
|
2021-04-27 07:04:25 +00:00
|
|
|
"dims": [4, 3, 2],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "Transpose - perms:[2, 0, 1]",
|
|
|
|
|
"operator": "Transpose",
|
|
|
|
|
"attributes": [{ "name": "perm", "data": [2, 0, 1], "type": "ints" }],
|
|
|
|
|
"cases": [
|
|
|
|
|
{
|
|
|
|
|
"name": "T[2,3]",
|
|
|
|
|
"inputs": [
|
|
|
|
|
{
|
2023-07-28 22:46:58 +00:00
|
|
|
"data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24],
|
2021-04-27 07:04:25 +00:00
|
|
|
"dims": [2, 3, 4],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
2023-07-28 22:46:58 +00:00
|
|
|
"data": [1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23, 4, 8, 12, 16, 20, 24],
|
2021-04-27 07:04:25 +00:00
|
|
|
"dims": [4, 2, 3],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "Transpose - perms:[1, 2, 0]",
|
|
|
|
|
"operator": "Transpose",
|
|
|
|
|
"attributes": [{ "name": "perm", "data": [1, 2, 0], "type": "ints" }],
|
|
|
|
|
"cases": [
|
|
|
|
|
{
|
|
|
|
|
"name": "T[2,3]",
|
|
|
|
|
"inputs": [
|
|
|
|
|
{
|
2023-07-28 22:46:58 +00:00
|
|
|
"data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24],
|
2021-04-27 07:04:25 +00:00
|
|
|
"dims": [2, 3, 4],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
2023-07-28 22:46:58 +00:00
|
|
|
"data": [1, 13, 2, 14, 3, 15, 4, 16, 5, 17, 6, 18, 7, 19, 8, 20, 9, 21, 10, 22, 11, 23, 12, 24],
|
2021-04-27 07:04:25 +00:00
|
|
|
"dims": [3, 4, 2],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "Transpose - perms:[1, 0, 2]",
|
|
|
|
|
"operator": "Transpose",
|
|
|
|
|
"attributes": [{ "name": "perm", "data": [1, 0, 2], "type": "ints" }],
|
|
|
|
|
"cases": [
|
|
|
|
|
{
|
|
|
|
|
"name": "T[2,3]",
|
|
|
|
|
"inputs": [
|
|
|
|
|
{
|
2023-07-28 22:46:58 +00:00
|
|
|
"data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24],
|
2021-04-27 07:04:25 +00:00
|
|
|
"dims": [2, 3, 4],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
2023-07-28 22:46:58 +00:00
|
|
|
"data": [1, 2, 3, 4, 13, 14, 15, 16, 5, 6, 7, 8, 17, 18, 19, 20, 9, 10, 11, 12, 21, 22, 23, 24],
|
2021-04-27 07:04:25 +00:00
|
|
|
"dims": [3, 2, 4],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
]
|
[js/webgpu] Fix the transpose error when dims > 4D (#18027)
### Description
<!-- Describe your changes. -->
Currently, the uniform support has bugs when dims rank is larger than 4.
See https://github.com/microsoft/onnxruntime/issues/17860 item 1.
So this PR only enables shapes uniforms when shape rank is <= 4 for
transpose. Otherwise, below compilation errors are thrown:
```
1 error(s) generated while compiling the shader:
:3:50 error: uniform storage requires that array elements are aligned to 16 bytes, but array element of type 'u32' has a stride of 4 bytes. Consider using a vector or struct as the element type instead.
struct Uniforms { output_size:u32, a_shape:array<u32, 5>, a_strides:array<u32, 5>, output_shape:array<u32, 5>, output_strides:array<u32, 5> };
^^^^^^^^^^^^^
:3:7 note: see layout of struct:
/* align(4) size(84) */ struct Uniforms {
/* offset( 0) align(4) size( 4) */ output_size : u32;
/* offset( 4) align(4) size(20) */ a_shape : array<u32, 5>;
/* offset(24) align(4) size(20) */ a_strides : array<u32, 5>;
/* offset(44) align(4) size(20) */ output_shape : array<u32, 5>;
/* offset(64) align(4) size(20) */ output_strides : array<u32, 5>;
/* */ };
struct Uniforms { output_size:u32, a_shape:array<u32, 5>, a_strides:array<u32, 5>, output_shape:array<u32, 5>, output_strides:array<u32, 5> };
^^^^^^
:4:42 note: 'Uniforms' used in address space 'uniform' here
@group(0) @binding(2) var<uniform> uniforms: Uniforms;
^^^^^^^^
```
2023-10-23 18:02:19 +00:00
|
|
|
},
|
[js/webgpu] Optimize transpose (#21964)
### Description
<!-- Describe your changes. -->
Fix bugs in previous implementation and add more situations to go the
optimized path.
Below situations will go to the optimized path.
1. 2d inputs or squeezed 2d inputs
2. channels last or channels first transpose. For example, channel last
transpose: [1, 256, 512, 512] -> [1, 512, 512, 256]
For this case, the transpose becomes [256, 512x512] -> [512x512, 256]
### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
For SD Turbo demo, the total transpose time becomes 39.98ms from
122.09ms. And the correspnding percents becomes 3.89% from 11.05% in
this demo.
This PR will also help #21618, the total transpose time in that demo
becomes 17.32 ms from 70.25 ms on my iGPUs.
2024-09-04 19:04:04 +00:00
|
|
|
{
|
|
|
|
|
"name": "Transpose squeezed 2d - perms:[0, 2, 1, 3]",
|
|
|
|
|
"operator": "Transpose",
|
|
|
|
|
"attributes": [{ "name": "perm", "data": [0, 2, 1, 3], "type": "ints" }],
|
|
|
|
|
"cases": [
|
|
|
|
|
{
|
|
|
|
|
"name": "T[1, 3 , 4, 1]",
|
|
|
|
|
"inputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
|
|
|
|
|
"dims": [1, 3, 4, 1],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": [1, 5, 9, 2, 6, 10, 3, 7, 11, 4, 8, 12],
|
|
|
|
|
"dims": [1, 4, 3, 1],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "Transpose 4D channelsFirst - perms:[0, 3, 1, 2]",
|
|
|
|
|
"operator": "Transpose",
|
|
|
|
|
"attributes": [{ "name": "perm", "data": [0, 3, 1, 2], "type": "ints" }],
|
|
|
|
|
"cases": [
|
|
|
|
|
{
|
|
|
|
|
"name": "T[1, 2, 3, 4]",
|
|
|
|
|
"inputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24],
|
|
|
|
|
"dims": [1, 2, 3, 4],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": [1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23, 4, 8, 12, 16, 20, 24],
|
|
|
|
|
"dims": [1, 4, 2, 3],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "Transpose 4D channelsLast - perms:[0, 2, 3, 1]",
|
|
|
|
|
"operator": "Transpose",
|
|
|
|
|
"attributes": [{ "name": "perm", "data": [0, 2, 3, 1], "type": "ints" }],
|
|
|
|
|
"cases": [
|
|
|
|
|
{
|
|
|
|
|
"name": "T[1, 2, 3, 4]",
|
|
|
|
|
"inputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24],
|
|
|
|
|
"dims": [1, 2, 3, 4],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": [1, 13, 2, 14, 3, 15, 4, 16, 5, 17, 6, 18, 7, 19, 8, 20, 9, 21, 10, 22, 11, 23, 12, 24],
|
|
|
|
|
"dims": [1, 3, 4, 2],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
},
|
[js/webgpu] Fix the transpose error when dims > 4D (#18027)
### Description
<!-- Describe your changes. -->
Currently, the uniform support has bugs when dims rank is larger than 4.
See https://github.com/microsoft/onnxruntime/issues/17860 item 1.
So this PR only enables shapes uniforms when shape rank is <= 4 for
transpose. Otherwise, below compilation errors are thrown:
```
1 error(s) generated while compiling the shader:
:3:50 error: uniform storage requires that array elements are aligned to 16 bytes, but array element of type 'u32' has a stride of 4 bytes. Consider using a vector or struct as the element type instead.
struct Uniforms { output_size:u32, a_shape:array<u32, 5>, a_strides:array<u32, 5>, output_shape:array<u32, 5>, output_strides:array<u32, 5> };
^^^^^^^^^^^^^
:3:7 note: see layout of struct:
/* align(4) size(84) */ struct Uniforms {
/* offset( 0) align(4) size( 4) */ output_size : u32;
/* offset( 4) align(4) size(20) */ a_shape : array<u32, 5>;
/* offset(24) align(4) size(20) */ a_strides : array<u32, 5>;
/* offset(44) align(4) size(20) */ output_shape : array<u32, 5>;
/* offset(64) align(4) size(20) */ output_strides : array<u32, 5>;
/* */ };
struct Uniforms { output_size:u32, a_shape:array<u32, 5>, a_strides:array<u32, 5>, output_shape:array<u32, 5>, output_strides:array<u32, 5> };
^^^^^^
:4:42 note: 'Uniforms' used in address space 'uniform' here
@group(0) @binding(2) var<uniform> uniforms: Uniforms;
^^^^^^^^
```
2023-10-23 18:02:19 +00:00
|
|
|
{
|
|
|
|
|
"name": "Transpose 5D - perms:[4, 3, 1, 0, 2]",
|
|
|
|
|
"operator": "Transpose",
|
|
|
|
|
"attributes": [{ "name": "perm", "data": [4, 3, 1, 0, 2], "type": "ints" }],
|
|
|
|
|
"cases": [
|
|
|
|
|
{
|
|
|
|
|
"name": "T[3, 1, 2, 1, 4]",
|
|
|
|
|
"inputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24],
|
|
|
|
|
"dims": [3, 1, 2, 1, 4],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": [1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23, 4, 8, 12, 16, 20, 24],
|
|
|
|
|
"dims": [4, 1, 1, 3, 2],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
]
|
2024-05-22 15:15:44 +00:00
|
|
|
},
|
2024-11-18 20:52:48 +00:00
|
|
|
{
|
|
|
|
|
"name": "Transpose as reshape - perms:[1, 0, 2, 4, 3]",
|
|
|
|
|
"operator": "Transpose",
|
|
|
|
|
"attributes": [{ "name": "perm", "data": [1, 0, 2, 4, 3], "type": "ints" }],
|
|
|
|
|
"cases": [
|
|
|
|
|
{
|
|
|
|
|
"name": "T[3, 1, 2, 1, 4]",
|
|
|
|
|
"inputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24],
|
|
|
|
|
"dims": [3, 1, 2, 1, 4],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24],
|
|
|
|
|
"dims": [1, 3, 2, 4, 1],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-05-22 15:15:44 +00:00
|
|
|
{
|
|
|
|
|
"name": "Transpose - perms:[1, 0]",
|
|
|
|
|
"operator": "Transpose",
|
|
|
|
|
"attributes": [{ "name": "perm", "data": [1, 0], "type": "ints" }],
|
|
|
|
|
"cases": [
|
|
|
|
|
{
|
|
|
|
|
"name": "T[6,4]",
|
|
|
|
|
"inputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24],
|
|
|
|
|
"dims": [6, 4],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": [1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23, 4, 8, 12, 16, 20, 24],
|
|
|
|
|
"dims": [4, 6],
|
|
|
|
|
"type": "float32"
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
]
|
2021-04-27 07:04:25 +00:00
|
|
|
}
|
|
|
|
|
]
|