onnxruntime/js/web/lib/wasm/jsep/webgpu/ops/quantize-linear.ts
Yulong Wang abdc31de40
[js] change default formatter for JavaScript/TypeScript from clang-format to Prettier (#21728)
### Description

See
454996d496
for manual changes (excluded auto-generated formatting changes)

### Why

Because the toolsets for old clang-format is out-of-date. This reduces
the development efficiency.

- The NPM package `clang-format` is already in maintenance mode. not
updated since 2 years ago.
- The VSCode extension for clang-format is not maintained for a while,
and a recent Node.js security update made it not working at all in
Windows.

No one in community seems interested in fixing those.

Choose Prettier as it is the most popular TS/JS formatter.

### How to merge

It's easy to break the build:
- Be careful of any new commits on main not included in this PR.
- Be careful that after this PR is merged, other PRs that already passed
CI can merge.

So, make sure there is no new commits before merging this one, and
invalidate js PRs that already passed CI, force them to merge to latest.
2024-08-14 16:51:22 -07:00

237 lines
11 KiB
TypeScript

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
import { DataType } from '../../../wasm-common';
import { TensorView } from '../../tensor-view';
import { ShapeUtil } from '../../util';
import { AttributeWithCacheKey, createAttributeWithCacheKey } from '../attribute-with-cache-key';
import { ComputeContext, ProgramInfo, ProgramUniform } from '../types';
import {
createTensorShapeVariables,
getMaxComponents,
inputVariable,
outputVariable,
ShaderHelper,
UniformsArrayType,
} from './common';
export interface DequantizeLinerAttributes extends AttributeWithCacheKey {
axis: number;
blockSize: number;
}
const validateInputs = (inputs: readonly TensorView[], attributes: DequantizeLinerAttributes): void => {
if (inputs.length < 2 || inputs.length > 3) {
throw new Error('DequantizeLinear requires 2 or 3 inputs.');
}
if (inputs.length === 3 && inputs[1].dims === inputs[2].dims) {
throw new Error('x-scale and x-zero-point must have the same shape.');
}
if (inputs.length === 3 && inputs[0].dataType !== inputs[2].dataType) {
throw new Error('x and x-zero-point must have the same data type.');
}
if (inputs[0].dataType === DataType.int32 && inputs.length > 2) {
throw new Error('In the case of dequantizing int32 there is no zero point.');
}
if (inputs[1].dims.length !== 0 && inputs[1].dims.length !== 1 && inputs[1].dims.length !== inputs[0].dims.length) {
throw new Error('scale input must be a scalar, a 1D tensor, or have the same rank as the input tensor.');
}
// validate scale and zero-point input shapes
if (inputs.length > 2) {
// zero-point input type should be the same as input data type.
if (inputs[0].dataType !== inputs[2].dataType) {
throw new Error('x and x-zero-point must have the same data type.');
}
// Scale and zero-point inputs must have the same shape
if (inputs[1].dims.length !== inputs[2].dims.length) {
throw new Error('scale and zero-point inputs must have the same rank.');
}
if (!inputs[1].dims.map((d, i) => d === inputs[2].dims[i]).reduce((a, b) => a && b, true)) {
throw new Error('scale and zero-point inputs must have the same shape.');
}
}
// Validate blockSize
if (attributes.blockSize > 0) {
// Block qunatization
if (inputs[1].dims.length === 0 || (inputs[1].dims.length === 1 && inputs[1].dims[0] === 1)) {
throw new Error('blockSize must be set only for block quantization.');
}
if (
!inputs[1].dims.map((d, i) => i === attributes.axis || d === inputs[0].dims[i]).reduce((a, b) => a && b, true)
) {
throw new Error('For block qunatization, scale input shape to match the input shape except for the axis');
}
// Scale input rank should be same as the input rank
if (inputs[1].dims.length !== inputs[0].dims.length) {
throw new Error('For block qunatization the scale input rank must be the same as the x rank.');
}
const dI = inputs[0].dims[attributes.axis];
const si = inputs[1].dims[attributes.axis];
if (attributes.blockSize < Math.ceil(dI / si) || attributes.blockSize > Math.ceil(dI / (si - 1) - 1)) {
throw new Error('blockSize must be with in the range [ceil(dI / Si), ceil(dI / (Si - 1) - 1)].');
}
}
};
const createDequantizeLinearProgramInfo = (
inputs: readonly TensorView[],
attributes: DequantizeLinerAttributes,
): ProgramInfo => {
const axis = ShapeUtil.normalizeAxis(attributes.axis, inputs[0].dims.length);
const inputType = inputs[0].dataType;
const isSigned = inputType === DataType.int8;
const outputShape = inputs[0].dims; // output shape is same as the input shape
const dataType = inputs[1].dataType; // output type is same as the the scale input type
const outputSize = ShapeUtil.size(outputShape);
const isPacked = inputType === DataType.int8 || inputType === DataType.uint8;
const inputShape = isPacked ? [Math.ceil(ShapeUtil.size(inputs[0].dims) / 4)] : inputs[0].dims;
const scaleShape = inputs[1].dims;
const zeroPointInput = inputs.length > 2 ? inputs[2] : undefined;
const zeroPointShape = zeroPointInput
? isPacked
? [Math.ceil(ShapeUtil.size(zeroPointInput.dims) / 4)]
: zeroPointInput.dims
: undefined;
// Scales input is a scaler for per-tensor/per-layer quantization, 1-D tensor for per-axis quantization
// or tensor with same rank as input for blocked quantization.
const perLayerQuantization = scaleShape.length === 0 || (scaleShape.length === 1 && scaleShape[0] === 1);
const perAxisQuantization = perLayerQuantization === false && scaleShape.length === 1;
// Left unnecessary commented-out assignment for documentation
// const blockQuantization = perLayerQuantization === false && perAxisQuantization === false;
const maxComponents = getMaxComponents(outputSize);
const useComponents = perLayerQuantization && (!isPacked || maxComponents === 4);
const components = useComponents ? maxComponents : 1;
const inputComponent = useComponents && !isPacked ? maxComponents : 1;
const input = inputVariable('input', isPacked ? DataType.uint32 : inputType, inputShape.length, inputComponent);
const scale = inputVariable('scale', dataType, scaleShape.length);
const zeroPoint = zeroPointInput
? inputVariable('zero_point', isPacked ? DataType.uint32 : inputType, zeroPointShape!.length)
: undefined;
const output = outputVariable('output', dataType, outputShape.length, components);
const inputVariables = [input, scale];
if (zeroPoint) {
inputVariables.push(zeroPoint);
}
const inputShapes = [inputShape, scaleShape];
if (zeroPointInput) {
inputShapes.push(zeroPointShape!);
}
const programUniforms: ProgramUniform[] = [
{ type: DataType.uint32, data: outputSize / components },
{ type: DataType.uint32, data: axis },
{ type: DataType.uint32, data: attributes.blockSize },
...createTensorShapeVariables(...inputShapes, outputShape),
];
const getShaderSource = (shaderHelper: ShaderHelper) => {
const uniforms: UniformsArrayType = [
{ name: 'output_size', type: 'u32' },
{ name: 'axis', type: 'u32' },
{ name: 'block_size', type: 'u32' },
];
return `
${shaderHelper.registerUniforms(uniforms).declareVariables(...inputVariables, output)}
${shaderHelper.mainStart()}
${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes('uniforms.output_size')}
let output_indices = ${output.offsetToIndices('global_idx')};
// Set input x
${(() => {
if (isPacked) {
return `
let input = ${input.getByOffset('global_idx / 4')};
let x_vec = ${isSigned ? 'unpack4xI8(input)' : 'unpack4xU8(input)'};
let x_value = ${components === 1 ? 'x_vec[global_idx % 4]' : 'x_vec'};`;
} else {
return `let x_value = ${input.getByOffset('global_idx')};`;
}
})()};
// Set scale input
${(() => {
if (perLayerQuantization) {
// scale input is a scalar ()
return `let scale_value= ${scale.getByOffset('0')}`;
} else if (perAxisQuantization) {
// scale input is a 1D tensor
return `
let scale_index = ${output.indicesGet('output_indices', 'uniforms.axis')};
let scale_value= ${scale.getByOffset('scale_index')};`;
} else {
// Block quantization. Scale input rank is same as input/output rank.
return `
var scale_indices: ${scale.type.indices} = output_indices;
let index = ${scale.indicesGet('scale_indices', 'uniforms.axis')} / uniforms.block_size;
${scale.indicesSet('scale_indices', 'uniforms.axis', 'index')};
let scale_value= ${scale.getByIndices('scale_indices')};`;
}
})()};
// Set zero-point input
${(() => {
if (zeroPoint) {
if (perLayerQuantization) {
// zero-point input is a scalar
if (isPacked) {
return `
let zero_point_input = ${zeroPoint.getByOffset('0')};
let zero_point_vec = ${isSigned ? 'unpack4xI8(zero_point_input)' : 'unpack4xU8(zero_point_input)'};
let zero_point_value= zero_point_vec[0]`;
} else {
return `let zero_point_value = ${zeroPoint.getByOffset('0')}`;
}
} else if (perAxisQuantization) {
// zero-point input is a 1D tensor
if (isPacked) {
return `
let zero_point_index = ${output.indicesGet('output_indices', 'uniforms.axis')};
let zero_point_input = ${zeroPoint.getByOffset('zero_point_index / 4')};
let zero_point_vec = ${isSigned ? 'unpack4xI8(zero_point_input)' : 'unpack4xU8(zero_point_input)'};
let zero_point_value = zero_point_vec[zero_point_index % 4]`;
} else {
return `
let zero_point_index = ${output.indicesGet('output_indices', 'uniforms.axis')};
let zero_point_value = ${zeroPoint.getByOffset('zero_point_index')};`;
}
} else {
// BlockedQuantization. The zero-point input shape is same as the input shape except along axis.
if (isPacked) {
return `
let zero_point_offset = ${scale.indicesToOffset('scale_indices')};
let zero_point_input = ${zeroPoint.getByOffset('zero_point_offset / 4')};
let zero_point_vec = ${isSigned ? 'unpack4xI8(zero_point_input)' : 'unpack4xU8(zero_point_input)'};
let zero_point_value = zero_point_vec[zero_point_offset % 4];`;
} else {
return `let zero_point_value = ${zeroPoint.getByIndices('scale_indices')};`;
}
}
} else {
return `let zero_point_value = ${isPacked ? (isSigned ? 'i32' : 'u32') : input.type.value}(0);`;
}
})()};
// Compute and write output
${output.setByOffset('global_idx', `${output.type.value}(x_value - zero_point_value) * scale_value`)};
}`;
};
return {
name: 'DequantizeLinear',
shaderCache: {
hint: attributes.cacheKey,
inputDependencies: zeroPoint ? ['rank', 'rank', 'rank'] : ['rank', 'rank'],
},
getShaderSource,
getRunData: () => ({
outputs: [{ dims: outputShape, dataType }],
dispatchGroup: { x: Math.ceil(outputSize / components / 64), y: 1, z: 1 },
programUniforms,
}),
};
};
export const dequantizeLinear = (context: ComputeContext, attributes: DequantizeLinerAttributes): void => {
validateInputs(context.inputs, attributes);
context.compute(createDequantizeLinearProgramInfo(context.inputs, attributes));
};
export const parseDequantizeLinearAttributes = (attributes: Record<string, unknown>): DequantizeLinerAttributes =>
createAttributeWithCacheKey({ axis: attributes.axis as number, blockSize: attributes.blockSize as number });