mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-18 21:21:17 +00:00
### Description <!-- Describe your changes. --> Added uniforms to Reduce op ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> Improve perforamnce.
374 lines
14 KiB
TypeScript
374 lines
14 KiB
TypeScript
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
// Licensed under the MIT License.
|
|
|
|
import {DataType} from '../../../wasm-common';
|
|
import {TensorView} from '../../tensor-view';
|
|
import {ShapeUtil} from '../../util';
|
|
import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key';
|
|
import {ComputeContext, ProgramInfo, ProgramShaderCacheInfo} from '../types';
|
|
|
|
import {createTensorShapeVariables, IndicesHelper, inputVariable, outputVariable, ShaderHelper} from './common';
|
|
import {reduceL1Shared, reduceL2Shared, reduceLogSumExpShared, reduceLogSumShared, reduceMaxShared, reduceMeanShared, reduceMinShared, reduceProdShared, reduceSumShared, reduceSumSquareShared} from './reduce-shared';
|
|
|
|
const validateInputs = (inputs: readonly TensorView[]): void => {
|
|
if (!inputs || inputs.length === 0 || inputs.length > 2) {
|
|
throw new Error('Reduce op requires 1 or 2 inputs.');
|
|
}
|
|
|
|
if (inputs.length === 2 && inputs[1].dims.length !== 1) {
|
|
throw new Error('Invalid axes input dims.');
|
|
}
|
|
};
|
|
|
|
export interface ReduceAttributes extends AttributeWithCacheKey {
|
|
keepDims: boolean;
|
|
noopWithEmptyAxes: boolean;
|
|
axes: number[];
|
|
}
|
|
|
|
export type ReduceOp =
|
|
(input: IndicesHelper, output: IndicesHelper,
|
|
axes: readonly number[]) => [string, string, string, string, ...string[]];
|
|
|
|
const noOp: ReduceOp = (input) => ['', '', `var value = ${input.getByIndices('input_indices')};`, ''];
|
|
export const createReduceProgramInfo =
|
|
(name: string, shaderCache: ProgramShaderCacheInfo, inputs: readonly TensorView[], reduceOp: ReduceOp,
|
|
axesInput: number[], outputDataType: DataType, keepDims = false, noopWithEmptyAxes = false): ProgramInfo => {
|
|
const outputShape: number[] = [];
|
|
const inputShape = inputs[0].dims;
|
|
const inputRank = inputShape.length;
|
|
const axes = ShapeUtil.normalizeAxes(axesInput, inputRank);
|
|
const reduceOnAllAxes = !noopWithEmptyAxes && axes.length === 0;
|
|
inputShape.forEach((d, i) => {
|
|
if (reduceOnAllAxes || axes.indexOf(i) >= 0) {
|
|
if (keepDims) {
|
|
outputShape.push(1);
|
|
} // else { // skip this axis}
|
|
} else {
|
|
outputShape.push(d);
|
|
}
|
|
});
|
|
const outputRank = outputShape.length;
|
|
const outputSize = ShapeUtil.size(outputShape);
|
|
const getShaderSource = (shaderHelper: ShaderHelper) => {
|
|
const idxCopy: string[] = []; // copy output indexes to input indexes
|
|
|
|
const input = inputVariable('_A', inputs[0].dataType, inputRank);
|
|
const output = outputVariable('output', outputDataType, outputRank);
|
|
const ops = reduceOp(input, output, axes);
|
|
let reduceOps = ops[2];
|
|
|
|
for (let k = 0, l = 0; k < inputRank; k++) {
|
|
// if this axis is reduced
|
|
if (reduceOnAllAxes || axes.indexOf(k) >= 0) {
|
|
if (keepDims) {
|
|
l++;
|
|
}
|
|
// loop over the d-th axis
|
|
reduceOps = `for(var j${k}: u32 = 0; j${k} < ${inputShape[k]}; j${k}++) {
|
|
${ops[2].includes('last_index') ? `let last_index = j${k};` : ''}
|
|
${input.indicesSet('input_indices', k, `j${k}`)}
|
|
${reduceOps}
|
|
}`;
|
|
} else {
|
|
idxCopy.push(`${input.indicesSet('input_indices', k, output.indicesGet('output_indices', l))};`);
|
|
l++;
|
|
}
|
|
}
|
|
return `
|
|
|
|
${shaderHelper.registerUniform('output_size', 'u32').declareVariables(input, output)}
|
|
|
|
${shaderHelper.mainStart()}
|
|
${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes('uniforms.output_size')}
|
|
var input_indices: ${input.type.indices};
|
|
let output_indices = ${output.offsetToIndices('global_idx')};
|
|
|
|
${idxCopy.join('\n')}
|
|
${ops[0]} // init ops for reduce max/min
|
|
${ops[1]}
|
|
${reduceOps}
|
|
${ops[3]}
|
|
${ops.length === 4 ? output.setByOffset('global_idx', 'value') : ops.slice(4).join('\n')}
|
|
}`;
|
|
};
|
|
|
|
return {
|
|
name,
|
|
shaderCache,
|
|
getShaderSource,
|
|
getRunData: () => ({
|
|
outputs: [{dims: outputShape, dataType: outputDataType}],
|
|
dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)},
|
|
programUniforms: [
|
|
{type: 'uint32', data: outputSize}, ...createTensorShapeVariables(inputShape),
|
|
...createTensorShapeVariables(outputShape)
|
|
]
|
|
}),
|
|
};
|
|
};
|
|
|
|
export const createReduceAttributesFromInputs =
|
|
(inputs: readonly TensorView[], attributes: ReduceAttributes): ReduceAttributes => {
|
|
const axes: number[] = [];
|
|
if (inputs[1].dims[0] > 0) {
|
|
inputs[1].getBigInt64Array().forEach(v => axes.push(Number(v)));
|
|
}
|
|
return createAttributeWithCacheKey(
|
|
{axes, keepDims: attributes.keepDims, noopWithEmptyAxes: attributes.noopWithEmptyAxes});
|
|
};
|
|
|
|
const runReduceProgram =
|
|
(context: ComputeContext, name: string, attributes: ReduceAttributes, reduceOp: ReduceOp): void => {
|
|
const inputs = context.inputs;
|
|
const updatedAttributes: ReduceAttributes =
|
|
inputs.length === 1 ? attributes : createReduceAttributesFromInputs(inputs, attributes);
|
|
|
|
context.compute(
|
|
createReduceProgramInfo(
|
|
name, {hint: updatedAttributes.cacheKey, inputDependencies: ['rank']}, [inputs[0]],
|
|
updatedAttributes.noopWithEmptyAxes && updatedAttributes.axes.length === 0 ? noOp : reduceOp,
|
|
updatedAttributes.axes, inputs[0].dataType, updatedAttributes.keepDims,
|
|
updatedAttributes.noopWithEmptyAxes),
|
|
{inputs: [0]});
|
|
};
|
|
|
|
const reduceLogSumNaive = (context: ComputeContext, attributes: ReduceAttributes): void => {
|
|
validateInputs(context.inputs);
|
|
const reduceOp: ReduceOp = (input, output) =>
|
|
[`var value = ${output.type.storage}(0);`,
|
|
'',
|
|
`value += ${input.getByIndices('input_indices')};`,
|
|
'value = log(value);',
|
|
];
|
|
runReduceProgram(context, 'ReduceLogSum', attributes, reduceOp);
|
|
};
|
|
|
|
const reduceL1Naive = (context: ComputeContext, attributes: ReduceAttributes): void => {
|
|
validateInputs(context.inputs);
|
|
const reduceOp: ReduceOp = (input, output) =>
|
|
[`var value = ${output.type.storage}(0);`,
|
|
'',
|
|
`value += abs(${input.getByIndices('input_indices')});`,
|
|
'',
|
|
];
|
|
runReduceProgram(context, 'ReduceL1', attributes, reduceOp);
|
|
};
|
|
|
|
const reduceL2Naive = (context: ComputeContext, attributes: ReduceAttributes): void => {
|
|
validateInputs(context.inputs);
|
|
const reduceOp: ReduceOp = (input, output) =>
|
|
[`var t = ${output.type.value}(0); var value = ${output.type.value}(0);`,
|
|
'',
|
|
`t = ${input.getByIndices('input_indices')}; value += (t * t);`,
|
|
'value = sqrt(value);',
|
|
];
|
|
runReduceProgram(context, 'ReduceL2', attributes, reduceOp);
|
|
};
|
|
|
|
const reduceLogSumExpNaive = (context: ComputeContext, attributes: ReduceAttributes): void => {
|
|
validateInputs(context.inputs);
|
|
const reduceOp: ReduceOp = (input, output) =>
|
|
[`var value = ${output.type.storage}(0);`,
|
|
'',
|
|
`value += exp(${input.getByIndices('input_indices')});`,
|
|
'value = log(value);',
|
|
];
|
|
runReduceProgram(context, 'ReduceLogSumExp', attributes, reduceOp);
|
|
};
|
|
|
|
const reduceMaxNaive = (context: ComputeContext, attributes: ReduceAttributes): void => {
|
|
validateInputs(context.inputs);
|
|
const reduceOp: ReduceOp = (input, _output, axes) => {
|
|
const idxZero = [];
|
|
for (let k = 0; k < input.rank; k++) {
|
|
if (axes.indexOf(k) >= 0 || axes.length === 0) {
|
|
idxZero.push(input.indicesSet('input_indices', k, 0));
|
|
}
|
|
}
|
|
|
|
return [
|
|
`${idxZero.join('\n')}`,
|
|
`var value = ${input.getByIndices('input_indices')};`,
|
|
`value = max(value, ${input.getByIndices('input_indices')});`,
|
|
'',
|
|
];
|
|
};
|
|
runReduceProgram(context, 'ReduceMax', attributes, reduceOp);
|
|
};
|
|
|
|
const reduceMeanNaive = (context: ComputeContext, attributes: ReduceAttributes): void => {
|
|
validateInputs(context.inputs);
|
|
const reduceOp: ReduceOp = (input, output, axes) => {
|
|
let size = 1.0;
|
|
for (let k = 0; k < input.rank; k++) {
|
|
if (axes.indexOf(k) >= 0 || axes.length === 0) {
|
|
// TODO: this depends on the input dims. If we want to use uniform, this need to be updated.
|
|
size *= context.inputs[0].dims[k];
|
|
}
|
|
}
|
|
|
|
return [
|
|
'var sum = f32(0);',
|
|
'',
|
|
`sum += f32(${input.getByIndices('input_indices')});`,
|
|
`let value = ${output.type.value}(sum / ${size});`,
|
|
];
|
|
};
|
|
runReduceProgram(context, 'ReduceMean', attributes, reduceOp);
|
|
};
|
|
|
|
const reduceMinNaive = (context: ComputeContext, attributes: ReduceAttributes): void => {
|
|
validateInputs(context.inputs);
|
|
const reduceOp: ReduceOp = (input, _output, axes) => {
|
|
const idxZero = [];
|
|
for (let k = 0; k < input.rank; k++) {
|
|
if (axes.indexOf(k) >= 0 || axes.length === 0) {
|
|
idxZero.push(`input_indices[${k}] = 0;`); // first element
|
|
}
|
|
}
|
|
|
|
return [
|
|
`${idxZero.join('\n')}`,
|
|
`var value = ${input.getByIndices('input_indices')};`,
|
|
`value = min(value, ${input.getByIndices('input_indices')});`,
|
|
'',
|
|
];
|
|
};
|
|
runReduceProgram(context, 'ReduceMin', attributes, reduceOp);
|
|
};
|
|
|
|
const reduceProdNaive = (context: ComputeContext, attributes: ReduceAttributes): void => {
|
|
validateInputs(context.inputs);
|
|
const reduceOp: ReduceOp = (input, output) =>
|
|
[`var value = ${output.type.storage}(1);`,
|
|
'',
|
|
`value *= ${input.getByIndices('input_indices')};`,
|
|
'',
|
|
];
|
|
runReduceProgram(context, 'ReduceProd', attributes, reduceOp);
|
|
};
|
|
|
|
const reduceSumNaive = (context: ComputeContext, attributes: ReduceAttributes): void => {
|
|
validateInputs(context.inputs);
|
|
const reduceOp: ReduceOp = (input, output) =>
|
|
[`var value = ${output.type.storage}(0);`,
|
|
'',
|
|
`value += ${input.getByIndices('input_indices')};`,
|
|
'',
|
|
];
|
|
runReduceProgram(context, 'ReduceSum', attributes, reduceOp);
|
|
};
|
|
|
|
const reduceSumSquareNaive = (context: ComputeContext, attributes: ReduceAttributes): void => {
|
|
validateInputs(context.inputs);
|
|
const reduceOp: ReduceOp = (input, output) =>
|
|
[`var t = ${output.type.value}(0); var value = ${output.type.value}(0);`,
|
|
'',
|
|
`t = ${input.getByIndices('input_indices')}; value += t * t;`,
|
|
'',
|
|
];
|
|
runReduceProgram(context, 'ReduceSumSquare', attributes, reduceOp);
|
|
};
|
|
|
|
const useNaiveReduceMethod =
|
|
(shape: readonly number[], axes: readonly number[], noopWithEmptyAxes: boolean): boolean => {
|
|
if (axes.length === 0) {
|
|
return noopWithEmptyAxes;
|
|
}
|
|
|
|
let outputSize = 1;
|
|
let reduceSize = 1;
|
|
for (let dim = 0; dim < axes.length; dim++) {
|
|
if (axes.indexOf(dim) === -1) {
|
|
outputSize *= shape[dim];
|
|
} else {
|
|
reduceSize *= shape[dim];
|
|
}
|
|
}
|
|
|
|
// The condition data is very rough, although considering the count of Execution Unit (EU), the potential
|
|
// work groups in a EU and the counts of loops in the naive and shared methods, also doing experiments
|
|
// on some machines.
|
|
return reduceSize < 32 && outputSize > 1024;
|
|
};
|
|
|
|
export const reduceMean = (context: ComputeContext, attributes: ReduceAttributes): void => {
|
|
if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {
|
|
reduceMeanNaive(context, attributes);
|
|
} else {
|
|
reduceMeanShared(context, attributes);
|
|
}
|
|
};
|
|
|
|
export const reduceL1 = (context: ComputeContext, attributes: ReduceAttributes): void => {
|
|
if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {
|
|
reduceL1Naive(context, attributes);
|
|
} else {
|
|
reduceL1Shared(context, attributes);
|
|
}
|
|
};
|
|
|
|
export const reduceL2 = (context: ComputeContext, attributes: ReduceAttributes): void => {
|
|
if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {
|
|
reduceL2Naive(context, attributes);
|
|
} else {
|
|
reduceL2Shared(context, attributes);
|
|
}
|
|
};
|
|
|
|
export const reduceLogSumExp = (context: ComputeContext, attributes: ReduceAttributes): void => {
|
|
if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {
|
|
reduceLogSumExpNaive(context, attributes);
|
|
} else {
|
|
reduceLogSumExpShared(context, attributes);
|
|
}
|
|
};
|
|
|
|
export const reduceMax = (context: ComputeContext, attributes: ReduceAttributes): void => {
|
|
if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {
|
|
reduceMaxNaive(context, attributes);
|
|
} else {
|
|
reduceMaxShared(context, attributes);
|
|
}
|
|
};
|
|
|
|
export const reduceMin = (context: ComputeContext, attributes: ReduceAttributes): void => {
|
|
if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {
|
|
reduceMinNaive(context, attributes);
|
|
} else {
|
|
reduceMinShared(context, attributes);
|
|
}
|
|
};
|
|
|
|
export const reduceProd = (context: ComputeContext, attributes: ReduceAttributes): void => {
|
|
if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {
|
|
reduceProdNaive(context, attributes);
|
|
} else {
|
|
reduceProdShared(context, attributes);
|
|
}
|
|
};
|
|
|
|
export const reduceSum = (context: ComputeContext, attributes: ReduceAttributes): void => {
|
|
if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {
|
|
reduceSumNaive(context, attributes);
|
|
} else {
|
|
reduceSumShared(context, attributes);
|
|
}
|
|
};
|
|
|
|
export const reduceSumSquare = (context: ComputeContext, attributes: ReduceAttributes): void => {
|
|
if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {
|
|
reduceSumSquareNaive(context, attributes);
|
|
} else {
|
|
reduceSumSquareShared(context, attributes);
|
|
}
|
|
};
|
|
|
|
export const reduceLogSum = (context: ComputeContext, attributes: ReduceAttributes): void => {
|
|
if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {
|
|
reduceLogSumNaive(context, attributes);
|
|
} else {
|
|
reduceLogSumShared(context, attributes);
|
|
}
|
|
};
|