mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
### Description Add GatherBlockQuantized operator to JSEP. ### Motivation and Context Gemma model requires this.
257 lines
7.2 KiB
Text
257 lines
7.2 KiB
Text
[
|
|
{
|
|
"name": "GatherBlockQuantized; quantize_axis=0, gather_axis=1, signed input, block_size=16",
|
|
"operator": "GatherBlockQuantized",
|
|
"opset": {
|
|
"domain": "com.microsoft",
|
|
"version": 1
|
|
},
|
|
"attributes": [
|
|
{
|
|
"name": "block_size",
|
|
"data": 16,
|
|
"type": "int"
|
|
},
|
|
{
|
|
"name": "gather_axis",
|
|
"data": 0,
|
|
"type": "int"
|
|
},
|
|
{
|
|
"name": "quantize_axis",
|
|
"data": 2,
|
|
"type": "int"
|
|
}
|
|
],
|
|
"cases": [
|
|
{
|
|
"name": "GatherBlockQuantized; quantize_axis=0, gather_axis=1, block_size=16, signed input",
|
|
"inputs": [
|
|
// data
|
|
{
|
|
"data": [
|
|
0, 1, 2, 3, 4, 5, 6, 7, 7, 0, 1, 2, 3, 4, 5, 6, 6, 7, 0, 1, 2, 3, 4, 5, 5, 6, 7, 0, 1, 2, 3, 4, 4, 5, 6,
|
|
7, 0, 1, 2, 3, 3, 4, 5, 6, 7, 0, 1, 2, 2, 3, 4, 5, 6, 7, 0, 1, 1, 2, 3, 4, 5, 6, 7, 0, 0, 1, 2, 3, 4, 5,
|
|
6, 7, 7, 0, 1, 2, 3, 4, 5, 6, 6, 7, 0, 1, 2, 3, 4, 5, 5, 6, 7, 0, 1, 2, 3, 4
|
|
],
|
|
"dims": [2, 3, 16],
|
|
"type": "int4"
|
|
},
|
|
// indices
|
|
{
|
|
"data": [1],
|
|
"dims": [1, 1, 1, 1],
|
|
"type": "int32"
|
|
},
|
|
// scale
|
|
{
|
|
"data": [1.0, 2.0, 1.0, 2.0, 1.0, 2.0],
|
|
"dims": [2, 3, 1],
|
|
"type": "float32"
|
|
},
|
|
// zero
|
|
{
|
|
"data": [1, 1, 0, 0, 1, -1],
|
|
"dims": [2, 3, 1],
|
|
"type": "int4"
|
|
}
|
|
],
|
|
"outputs": [
|
|
{
|
|
"data": [
|
|
4, 6, 8, 10, 12, 14, 0, 2, 2, 4, 6, 8, 10, 12, 14, 0, -1, 0, 1, 2, 3, 4, 5, 6, 6, -1, 0, 1, 2, 3, 4, 5,
|
|
14, 16, 2, 4, 6, 8, 10, 12, 12, 14, 16, 2, 4, 6, 8, 10
|
|
],
|
|
"dims": [1, 1, 1, 1, 3, 16],
|
|
"type": "float32"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"name": "GatherBlockQuantized; quantize_axis=0, gather_axis=1, signed block_size=16, signed input, negative indices",
|
|
"inputs": [
|
|
// data
|
|
{
|
|
"data": [
|
|
0, 1, 2, 3, 4, 5, 6, 7, 7, 0, 1, 2, 3, 4, 5, 6, 6, 7, 0, 1, 2, 3, 4, 5, 5, 6, 7, 0, 1, 2, 3, 4, 4, 5, 6,
|
|
7, 0, 1, 2, 3, 3, 4, 5, 6, 7, 0, 1, 2, 2, 3, 4, 5, 6, 7, 0, 1, 1, 2, 3, 4, 5, 6, 7, 0, 0, 1, 2, 3, 4, 5,
|
|
6, 7, 7, 0, 1, 2, 3, 4, 5, 6, 6, 7, 0, 1, 2, 3, 4, 5, 5, 6, 7, 0, 1, 2, 3, 4
|
|
],
|
|
"dims": [2, 3, 16],
|
|
"type": "int4"
|
|
},
|
|
// indices
|
|
{
|
|
"data": [-1],
|
|
"dims": [1],
|
|
"type": "int32"
|
|
},
|
|
// scale
|
|
{
|
|
"data": [0.5, 1.0, 1.25, 1.5, 1.75, 2.0],
|
|
"dims": [2, 3, 1],
|
|
"type": "float32"
|
|
},
|
|
// zero
|
|
{
|
|
"data": [0, 1, 2, 3, 4, 5],
|
|
"dims": [2, 3, 1],
|
|
"type": "int4"
|
|
}
|
|
],
|
|
"outputs": [
|
|
{
|
|
"data": [
|
|
-1.5, 0, 1.5, 3, 4.5, 6, -4.5, -3, -3, -1.5, 0, 1.5, 3, 4.5, 6, -4.5, -7, -5.25, -3.5, -1.75, 0, 1.75,
|
|
3.5, 5.25, 5.25, -7, -5.25, -3.5, -1.75, 0, 1.75, 3.5, 2, 4, -10, -8, -6, -4, -2, 0, 0, 2, 4, -10, -8, -6,
|
|
-4, -2
|
|
],
|
|
"dims": [1, 3, 16],
|
|
"type": "float32"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"name": "GatherBlockQuantized; quantize_axis=0, gather_axis=1, unsigned input, block_size=16",
|
|
"operator": "GatherBlockQuantized",
|
|
"opset": {
|
|
"domain": "com.microsoft",
|
|
"version": 1
|
|
},
|
|
"attributes": [
|
|
{
|
|
"name": "block_size",
|
|
"data": 16,
|
|
"type": "int"
|
|
},
|
|
{
|
|
"name": "gather_axis",
|
|
"data": 0,
|
|
"type": "int"
|
|
},
|
|
{
|
|
"name": "quantize_axis",
|
|
"data": 2,
|
|
"type": "int"
|
|
}
|
|
],
|
|
"cases": [
|
|
{
|
|
"name": "GatherBlockQuantized; quantize_axis=0, gather_axis=1, block_size=16, unsigned input",
|
|
"inputs": [
|
|
// data
|
|
{
|
|
"data": [
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
|
|
14, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
|
|
11, 12, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7,
|
|
8, 9, 10
|
|
],
|
|
"dims": [2, 3, 16],
|
|
"type": "uint4"
|
|
},
|
|
// indices
|
|
{
|
|
"data": [1],
|
|
"dims": [1],
|
|
"type": "int32"
|
|
},
|
|
// scale
|
|
{
|
|
"data": [1.0, 2.0, 1.0, 2.0, 1.0, 2.0],
|
|
"dims": [2, 3, 1],
|
|
"type": "float32"
|
|
},
|
|
// zero
|
|
{
|
|
"data": [1, 1, 0, 0, 1, 1],
|
|
"dims": [2, 3, 1],
|
|
"type": "uint4"
|
|
}
|
|
],
|
|
"outputs": [
|
|
{
|
|
"data": [
|
|
26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 11, 12, 13, 14, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,
|
|
9, 10, 20, 22, 24, 26, 28, -2, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18
|
|
],
|
|
"dims": [1, 3, 16],
|
|
"type": "float32"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"name": "GatherBlockQuantized; quantize_axis=0, gather_axis=1, signed block_size=16",
|
|
"operator": "GatherBlockQuantized",
|
|
"opset": {
|
|
"domain": "com.microsoft",
|
|
"version": 1
|
|
},
|
|
"attributes": [
|
|
{
|
|
"name": "block_size",
|
|
"data": 16,
|
|
"type": "int"
|
|
},
|
|
{
|
|
"name": "gather_axis",
|
|
"data": 0,
|
|
"type": "int"
|
|
},
|
|
{
|
|
"name": "quantize_axis",
|
|
"data": 2,
|
|
"type": "int"
|
|
}
|
|
],
|
|
"cases": [
|
|
{
|
|
"name": "GatherBlockQuantized; quantize_axis=0, gather_axis=1, signed block_size=16, signed input; indices dim > 1",
|
|
"inputs": [
|
|
// data
|
|
{
|
|
"data": [
|
|
0, 1, 2, 3, 4, 5, 6, 7, 7, 0, 1, 2, 3, 4, 5, 6, 6, 7, 0, 1, 2, 3, 4, 5, 5, 6, 7, 0, 1, 2, 3, 4, 4, 5, 6,
|
|
7, 0, 1, 2, 3, 3, 4, 5, 6, 7, 0, 1, 2, 2, 3, 4, 5, 6, 7, 0, 1, 1, 2, 3, 4, 5, 6, 7, 0, 0, 1, 2, 3, 4, 5,
|
|
6, 7, 7, 0, 1, 2, 3, 4, 5, 6, 6, 7, 0, 1, 2, 3, 4, 5, 5, 6, 7, 0, 1, 2, 3, 4
|
|
],
|
|
"dims": [2, 3, 16],
|
|
"type": "int4"
|
|
},
|
|
// indices
|
|
{
|
|
"data": [1],
|
|
"dims": [1, 1],
|
|
"type": "int32"
|
|
},
|
|
// scale
|
|
{
|
|
"data": [1.0, 2.0, 1.0, 2.0, 1.0, 2.0],
|
|
"dims": [2, 3, 1],
|
|
"type": "float32"
|
|
},
|
|
// zero
|
|
{
|
|
"data": [1, 1, 0, 0, 1, -1],
|
|
"dims": [2, 3, 1],
|
|
"type": "int4"
|
|
}
|
|
],
|
|
"outputs": [
|
|
{
|
|
"data": [
|
|
4, 6, 8, 10, 12, 14, 0, 2, 2, 4, 6, 8, 10, 12, 14, 0, -1, 0, 1, 2, 3, 4, 5, 6, 6, -1, 0, 1, 2, 3, 4, 5,
|
|
14, 16, 2, 4, 6, 8, 10, 12, 12, 14, 16, 2, 4, 6, 8, 10
|
|
],
|
|
"dims": [1, 1, 3, 16],
|
|
"type": "float32"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
]
|