mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-16 21:00:14 +00:00
### Description Changes in this PR: 1) use the optimized version `makeMatMulPacked[Vec4]Source` to support matmul. 2) enable the conv2dByMatMul path. 3) support broadcast 4) use IndicesHelper. MatMul with M = 512, K = 512, N = 512 becomes 2ms from 15ms when enabling profilingMode on my ADL.
319 lines
13 KiB
Text
319 lines
13 KiB
Text
[
|
|
{
|
|
"name": "matmul tests with no attributes",
|
|
"operator": "MatMul",
|
|
"attributes": [],
|
|
"cases": [
|
|
{
|
|
"name": "multiplies 2D tensors [2,4]x[4,2]",
|
|
"inputs": [
|
|
{
|
|
"data": [1, 2, 1, 3, 2, 3, 1, 2],
|
|
"dims": [2, 4],
|
|
"type": "float32"
|
|
},
|
|
{
|
|
"data": [2, 1, 1, 2, 2, 3, 0, 4],
|
|
"dims": [4, 2],
|
|
"type": "float32"
|
|
}
|
|
],
|
|
"outputs": [
|
|
{
|
|
"data": [6, 20, 9, 19],
|
|
"dims": [2, 2],
|
|
"type": "float32"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"name": "multiplies 2D tensors [2,2]x[2,2]",
|
|
"inputs": [
|
|
{
|
|
"data": [1, 2, 3, 4],
|
|
"dims": [2, 2],
|
|
"type": "float32"
|
|
},
|
|
{
|
|
"data": [1, 2, 3, 4],
|
|
"dims": [2, 2],
|
|
"type": "float32"
|
|
}
|
|
],
|
|
"outputs": [
|
|
{
|
|
"data": [7, 10, 15, 22],
|
|
"dims": [2, 2],
|
|
"type": "float32"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"name": "multiplies 2D tensors [2,3]x[3,2]",
|
|
"inputs": [
|
|
{
|
|
"data": [1, 2, 3, 4, 5, 6],
|
|
"dims": [2, 3],
|
|
"type": "float32"
|
|
},
|
|
{
|
|
"data": [1, 2, 3, 4, 5, 6],
|
|
"dims": [3, 2],
|
|
"type": "float32"
|
|
}
|
|
],
|
|
"outputs": [
|
|
{
|
|
"data": [22, 28, 49, 64],
|
|
"dims": [2, 2],
|
|
"type": "float32"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"name": "multiplies 3D tensors",
|
|
"inputs": [
|
|
{
|
|
"data": [1, 2, 3, 4, 5, 6, 7, 8, 17, 18, 19, 20, 21, 22, 23, 24, 9, 10, 11, 12, 13, 14, 15, 16],
|
|
"dims": [3, 2, 4],
|
|
"type": "float32"
|
|
},
|
|
{
|
|
"data": [
|
|
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 25, 26, 27, 28, 29,
|
|
30, 31, 32, 33, 34, 35, 36
|
|
],
|
|
"dims": [3, 4, 3],
|
|
"type": "float32"
|
|
}
|
|
],
|
|
"outputs": [
|
|
{
|
|
"data": [190, 200, 210, 470, 496, 522, 422, 496, 570, 510, 600, 690, 1254, 1296, 1338, 1726, 1784, 1842],
|
|
"dims": [3, 2, 3],
|
|
"type": "float32"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"name": "multiplies 4D tensors",
|
|
"inputs": [
|
|
{
|
|
"data": [
|
|
37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
|
|
63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
|
|
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36
|
|
],
|
|
"dims": [2, 3, 4, 3],
|
|
"type": "float32"
|
|
},
|
|
{
|
|
"data": [
|
|
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
|
|
72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
|
|
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
|
|
37, 38, 39, 40, 41, 42, 43, 44, 45
|
|
],
|
|
"dims": [2, 3, 3, 5],
|
|
"type": "float32"
|
|
}
|
|
],
|
|
"outputs": [
|
|
{
|
|
"data": [
|
|
5824, 5938, 6052, 6166, 6280, 6283, 6406, 6529, 6652, 6775, 6742, 6874, 7006, 7138, 7270, 7201, 7342,
|
|
7483, 7624, 7765, 9910, 10060, 10210, 10360, 10510, 10504, 10663, 10822, 10981, 11140, 11098, 11266,
|
|
11434, 11602, 11770, 11692, 11869, 12046, 12223, 12400, 15076, 15262, 15448, 15634, 15820, 15805, 16000,
|
|
16195, 16390, 16585, 16534, 16738, 16942, 17146, 17350, 17263, 17476, 17689, 17902, 18115, 46, 52, 58, 64,
|
|
70, 100, 115, 130, 145, 160, 154, 178, 202, 226, 250, 208, 241, 274, 307, 340, 892, 934, 976, 1018, 1060,
|
|
1081, 1132, 1183, 1234, 1285, 1270, 1330, 1390, 1450, 1510, 1459, 1528, 1597, 1666, 1735, 2818, 2896,
|
|
2974, 3052, 3130, 3142, 3229, 3316, 3403, 3490, 3466, 3562, 3658, 3754, 3850, 3790, 3895, 4000, 4105, 4210
|
|
],
|
|
"dims": [2, 3, 4, 5],
|
|
"type": "float32"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"name": "multiplies 2D broadcasted to 4D tensors",
|
|
"inputs": [
|
|
{
|
|
"data": [1, 2, 1, 3, 2, 3, 1, 2],
|
|
"dims": [2, 4],
|
|
"type": "float32"
|
|
},
|
|
{
|
|
"data": [
|
|
49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
|
|
75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
|
|
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
|
|
122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
|
|
143, 144, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
|
|
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 145, 146, 147,
|
|
148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
|
|
169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
|
|
190, 191, 192
|
|
],
|
|
"dims": [4, 3, 2, 4, 2],
|
|
"type": "float32"
|
|
}
|
|
],
|
|
"outputs": [
|
|
{
|
|
"data": [
|
|
369, 376, 414, 422, 425, 432, 478, 486, 481, 488, 542, 550, 537, 544, 606, 614, 593, 600, 670, 678, 649,
|
|
656, 734, 742, 705, 712, 798, 806, 761, 768, 862, 870, 817, 824, 926, 934, 873, 880, 990, 998, 929, 936,
|
|
1054, 1062, 985, 992, 1118, 1126, 33, 40, 30, 38, 89, 96, 94, 102, 145, 152, 158, 166, 201, 208, 222, 230,
|
|
257, 264, 286, 294, 313, 320, 350, 358, 1041, 1048, 1182, 1190, 1097, 1104, 1246, 1254, 1153, 1160, 1310,
|
|
1318, 1209, 1216, 1374, 1382, 1265, 1272, 1438, 1446, 1321, 1328, 1502, 1510
|
|
],
|
|
"dims": [4, 3, 2, 2, 2],
|
|
"type": "float32"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"name": "multiplies 4D broadcasted to 5D tensors",
|
|
"inputs": [
|
|
{
|
|
"data": [1, 2, 3, 4, 5, 6, 7, 8, 17, 18, 19, 20, 21, 22, 23, 24, 9, 10, 11, 12, 13, 14, 15, 16],
|
|
"dims": [3, 1, 2, 4],
|
|
"type": "float32"
|
|
},
|
|
{
|
|
"data": [
|
|
49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
|
|
75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
|
|
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
|
|
122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
|
|
143, 144, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
|
|
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 145, 146, 147,
|
|
148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
|
|
169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
|
|
190, 191, 192
|
|
],
|
|
"dims": [4, 3, 2, 4, 2],
|
|
"type": "float32"
|
|
}
|
|
],
|
|
"outputs": [
|
|
{
|
|
"data": [
|
|
530, 540, 1362, 1388, 610, 620, 1570, 1596, 5042, 5116, 6130, 6220, 5634, 5708, 6850, 6940, 3538, 3580,
|
|
4882, 4940, 3874, 3916, 5346, 5404, 1010, 1020, 2610, 2636, 1090, 1100, 2818, 2844, 8594, 8668, 10450,
|
|
10540, 9186, 9260, 11170, 11260, 5554, 5596, 7666, 7724, 5890, 5932, 8130, 8188, 50, 60, 114, 140, 130,
|
|
140, 322, 348, 1490, 1564, 1810, 1900, 2082, 2156, 2530, 2620, 1522, 1564, 2098, 2156, 1858, 1900, 2562,
|
|
2620, 1490, 1500, 3858, 3884, 1570, 1580, 4066, 4092, 12146, 12220, 14770, 14860, 12738, 12812, 15490,
|
|
15580, 7570, 7612, 10450, 10508, 7906, 7948, 10914, 10972
|
|
],
|
|
"dims": [4, 3, 2, 2, 2],
|
|
"type": "float32"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"name": "multiplies 5D with 2D tensors",
|
|
"inputs": [
|
|
{
|
|
"data": [
|
|
73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
|
|
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
|
|
38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
|
64, 65, 66, 67, 68, 69, 70, 71, 72
|
|
],
|
|
"dims": [5, 3, 1, 2, 3],
|
|
"type": "float32"
|
|
},
|
|
{
|
|
"data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
|
|
"dims": [3, 5],
|
|
"type": "float32"
|
|
}
|
|
],
|
|
"outputs": [
|
|
{
|
|
"data": [
|
|
1342, 1564, 1786, 2008, 2230, 1396, 1627, 1858, 2089, 2320, 1450, 1690, 1930, 2170, 2410, 1504, 1753,
|
|
2002, 2251, 2500, 1558, 1816, 2074, 2332, 2590, 1612, 1879, 2146, 2413, 2680, 46, 52, 58, 64, 70, 100,
|
|
115, 130, 145, 160, 154, 178, 202, 226, 250, 208, 241, 274, 307, 340, 262, 304, 346, 388, 430, 316, 367,
|
|
418, 469, 520, 370, 430, 490, 550, 610, 424, 493, 562, 631, 700, 478, 556, 634, 712, 790, 532, 619, 706,
|
|
793, 880, 586, 682, 778, 874, 970, 640, 745, 850, 955, 1060, 694, 808, 922, 1036, 1150, 748, 871, 994,
|
|
1117, 1240, 802, 934, 1066, 1198, 1330, 856, 997, 1138, 1279, 1420, 910, 1060, 1210, 1360, 1510, 964,
|
|
1123, 1282, 1441, 1600, 1018, 1186, 1354, 1522, 1690, 1072, 1249, 1426, 1603, 1780, 1126, 1312, 1498,
|
|
1684, 1870, 1180, 1375, 1570, 1765, 1960, 1234, 1438, 1642, 1846, 2050, 1288, 1501, 1714, 1927, 2140
|
|
],
|
|
"dims": [5, 3, 1, 2, 5],
|
|
"type": "float32"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"name": "multiplies 2D with 4D tensors vec4",
|
|
"inputs": [
|
|
{
|
|
"data": [1, 2, 1, 3, 2, 3, 1, 2],
|
|
"dims": [2, 4],
|
|
"type": "float32"
|
|
},
|
|
{
|
|
"data": [
|
|
49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
|
|
75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
|
|
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
|
|
122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
|
|
30, 31
|
|
],
|
|
"dims": [3, 2, 4, 4],
|
|
"type": "float32"
|
|
}
|
|
],
|
|
"outputs": [
|
|
{
|
|
"data": [
|
|
395, 402, 409, 416, 436, 444, 452, 460, 507, 514, 521, 528, 564, 572, 580, 588, 619, 626, 633, 640, 692,
|
|
700, 708, 716, 731, 738, 745, 752, 820, 828, 836, 844, 843, 850, 857, 864, 948, 956, 964, 972, 955, 962,
|
|
630, 637, 1076, 1084, 866, 874
|
|
],
|
|
"dims": [3, 2, 2, 4],
|
|
"type": "float32"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"name": "multiplies 5D with 3D tensors vec4",
|
|
"inputs": [
|
|
{
|
|
"data": [
|
|
49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
|
|
75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
|
|
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
|
|
122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
|
|
30, 31
|
|
],
|
|
"dims": [3, 1, 2, 4, 4],
|
|
"type": "float32"
|
|
},
|
|
{
|
|
"data": [1, 2, 1, 3, 2, 3, 1, 2, 1, 2, 3, 4, 5, 6, 7, 8],
|
|
"dims": [1, 4, 4],
|
|
"type": "float32"
|
|
}
|
|
],
|
|
"outputs": [
|
|
{
|
|
"data": [
|
|
460, 662, 616, 867, 496, 714, 664, 935, 532, 766, 712, 1003, 568, 818, 760, 1071, 604, 870, 808, 1139,
|
|
640, 922, 856, 1207, 676, 974, 904, 1275, 712, 1026, 952, 1343, 748, 1078, 1000, 1411, 784, 1130, 1048,
|
|
1479, 820, 1182, 1096, 1547, 856, 1234, 1144, 1615, 892, 1286, 1192, 1683, 928, 1338, 1240, 1751, 964,
|
|
1390, 1288, 1819, 1000, 1442, 1336, 1887, 1036, 1494, 1384, 1955, 1072, 1546, 1432, 2023, 1108, 1598,
|
|
1480, 2091, 1144, 1650, 1528, 2159, 1180, 1702, 1576, 2227, 1216, 1754, 1624, 2295, 1252, 1806, 1672,
|
|
2363, 610, 954, 590, 1075
|
|
],
|
|
"dims": [3, 1, 2, 4, 4],
|
|
"type": "float32"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
]
|