onnxruntime/js/web/test/data/ops/gather-block-quantized.jsonc
Satya Kumar Jandhyala af18824f43
[JS/WebGPU] Add GatherBlockQuantized op support (#21734)
### Description
Add GatherBlockQuantized operator to JSEP.



### Motivation and Context
Gemma model requires this.
2024-08-26 14:46:04 -07:00

257 lines
7.2 KiB
Text

[
{
"name": "GatherBlockQuantized; quantize_axis=0, gather_axis=1, signed input, block_size=16",
"operator": "GatherBlockQuantized",
"opset": {
"domain": "com.microsoft",
"version": 1
},
"attributes": [
{
"name": "block_size",
"data": 16,
"type": "int"
},
{
"name": "gather_axis",
"data": 0,
"type": "int"
},
{
"name": "quantize_axis",
"data": 2,
"type": "int"
}
],
"cases": [
{
"name": "GatherBlockQuantized; quantize_axis=0, gather_axis=1, block_size=16, signed input",
"inputs": [
// data
{
"data": [
0, 1, 2, 3, 4, 5, 6, 7, 7, 0, 1, 2, 3, 4, 5, 6, 6, 7, 0, 1, 2, 3, 4, 5, 5, 6, 7, 0, 1, 2, 3, 4, 4, 5, 6,
7, 0, 1, 2, 3, 3, 4, 5, 6, 7, 0, 1, 2, 2, 3, 4, 5, 6, 7, 0, 1, 1, 2, 3, 4, 5, 6, 7, 0, 0, 1, 2, 3, 4, 5,
6, 7, 7, 0, 1, 2, 3, 4, 5, 6, 6, 7, 0, 1, 2, 3, 4, 5, 5, 6, 7, 0, 1, 2, 3, 4
],
"dims": [2, 3, 16],
"type": "int4"
},
// indices
{
"data": [1],
"dims": [1, 1, 1, 1],
"type": "int32"
},
// scale
{
"data": [1.0, 2.0, 1.0, 2.0, 1.0, 2.0],
"dims": [2, 3, 1],
"type": "float32"
},
// zero
{
"data": [1, 1, 0, 0, 1, -1],
"dims": [2, 3, 1],
"type": "int4"
}
],
"outputs": [
{
"data": [
4, 6, 8, 10, 12, 14, 0, 2, 2, 4, 6, 8, 10, 12, 14, 0, -1, 0, 1, 2, 3, 4, 5, 6, 6, -1, 0, 1, 2, 3, 4, 5,
14, 16, 2, 4, 6, 8, 10, 12, 12, 14, 16, 2, 4, 6, 8, 10
],
"dims": [1, 1, 1, 1, 3, 16],
"type": "float32"
}
]
},
{
"name": "GatherBlockQuantized; quantize_axis=0, gather_axis=1, signed block_size=16, signed input, negative indices",
"inputs": [
// data
{
"data": [
0, 1, 2, 3, 4, 5, 6, 7, 7, 0, 1, 2, 3, 4, 5, 6, 6, 7, 0, 1, 2, 3, 4, 5, 5, 6, 7, 0, 1, 2, 3, 4, 4, 5, 6,
7, 0, 1, 2, 3, 3, 4, 5, 6, 7, 0, 1, 2, 2, 3, 4, 5, 6, 7, 0, 1, 1, 2, 3, 4, 5, 6, 7, 0, 0, 1, 2, 3, 4, 5,
6, 7, 7, 0, 1, 2, 3, 4, 5, 6, 6, 7, 0, 1, 2, 3, 4, 5, 5, 6, 7, 0, 1, 2, 3, 4
],
"dims": [2, 3, 16],
"type": "int4"
},
// indices
{
"data": [-1],
"dims": [1],
"type": "int32"
},
// scale
{
"data": [0.5, 1.0, 1.25, 1.5, 1.75, 2.0],
"dims": [2, 3, 1],
"type": "float32"
},
// zero
{
"data": [0, 1, 2, 3, 4, 5],
"dims": [2, 3, 1],
"type": "int4"
}
],
"outputs": [
{
"data": [
-1.5, 0, 1.5, 3, 4.5, 6, -4.5, -3, -3, -1.5, 0, 1.5, 3, 4.5, 6, -4.5, -7, -5.25, -3.5, -1.75, 0, 1.75,
3.5, 5.25, 5.25, -7, -5.25, -3.5, -1.75, 0, 1.75, 3.5, 2, 4, -10, -8, -6, -4, -2, 0, 0, 2, 4, -10, -8, -6,
-4, -2
],
"dims": [1, 3, 16],
"type": "float32"
}
]
}
]
},
{
"name": "GatherBlockQuantized; quantize_axis=0, gather_axis=1, unsigned input, block_size=16",
"operator": "GatherBlockQuantized",
"opset": {
"domain": "com.microsoft",
"version": 1
},
"attributes": [
{
"name": "block_size",
"data": 16,
"type": "int"
},
{
"name": "gather_axis",
"data": 0,
"type": "int"
},
{
"name": "quantize_axis",
"data": 2,
"type": "int"
}
],
"cases": [
{
"name": "GatherBlockQuantized; quantize_axis=0, gather_axis=1, block_size=16, unsigned input",
"inputs": [
// data
{
"data": [
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10
],
"dims": [2, 3, 16],
"type": "uint4"
},
// indices
{
"data": [1],
"dims": [1],
"type": "int32"
},
// scale
{
"data": [1.0, 2.0, 1.0, 2.0, 1.0, 2.0],
"dims": [2, 3, 1],
"type": "float32"
},
// zero
{
"data": [1, 1, 0, 0, 1, 1],
"dims": [2, 3, 1],
"type": "uint4"
}
],
"outputs": [
{
"data": [
26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 11, 12, 13, 14, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 20, 22, 24, 26, 28, -2, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18
],
"dims": [1, 3, 16],
"type": "float32"
}
]
}
]
},
{
"name": "GatherBlockQuantized; quantize_axis=0, gather_axis=1, signed block_size=16",
"operator": "GatherBlockQuantized",
"opset": {
"domain": "com.microsoft",
"version": 1
},
"attributes": [
{
"name": "block_size",
"data": 16,
"type": "int"
},
{
"name": "gather_axis",
"data": 0,
"type": "int"
},
{
"name": "quantize_axis",
"data": 2,
"type": "int"
}
],
"cases": [
{
"name": "GatherBlockQuantized; quantize_axis=0, gather_axis=1, signed block_size=16, signed input; indices dim > 1",
"inputs": [
// data
{
"data": [
0, 1, 2, 3, 4, 5, 6, 7, 7, 0, 1, 2, 3, 4, 5, 6, 6, 7, 0, 1, 2, 3, 4, 5, 5, 6, 7, 0, 1, 2, 3, 4, 4, 5, 6,
7, 0, 1, 2, 3, 3, 4, 5, 6, 7, 0, 1, 2, 2, 3, 4, 5, 6, 7, 0, 1, 1, 2, 3, 4, 5, 6, 7, 0, 0, 1, 2, 3, 4, 5,
6, 7, 7, 0, 1, 2, 3, 4, 5, 6, 6, 7, 0, 1, 2, 3, 4, 5, 5, 6, 7, 0, 1, 2, 3, 4
],
"dims": [2, 3, 16],
"type": "int4"
},
// indices
{
"data": [1],
"dims": [1, 1],
"type": "int32"
},
// scale
{
"data": [1.0, 2.0, 1.0, 2.0, 1.0, 2.0],
"dims": [2, 3, 1],
"type": "float32"
},
// zero
{
"data": [1, 1, 0, 0, 1, -1],
"dims": [2, 3, 1],
"type": "int4"
}
],
"outputs": [
{
"data": [
4, 6, 8, 10, 12, 14, 0, 2, 2, 4, 6, 8, 10, 12, 14, 0, -1, 0, 1, 2, 3, 4, 5, 6, 6, -1, 0, 1, 2, 3, 4, 5,
14, 16, 2, 4, 6, 8, 10, 12, 12, 14, 16, 2, 4, 6, 8, 10
],
"dims": [1, 1, 3, 16],
"type": "float32"
}
]
}
]
}
]