onnxruntime/js/web/lib/wasm/jsep/webgpu/ops/conv-transpose.ts
Yulong Wang abdc31de40
[js] change default formatter for JavaScript/TypeScript from clang-format to Prettier (#21728)
### Description

See
454996d496
for manual changes (excluded auto-generated formatting changes)

### Why

Because the toolsets for old clang-format is out-of-date. This reduces
the development efficiency.

- The NPM package `clang-format` is already in maintenance mode. not
updated since 2 years ago.
- The VSCode extension for clang-format is not maintained for a while,
and a recent Node.js security update made it not working at all in
Windows.

No one in community seems interested in fixing those.

Choose Prettier as it is the most popular TS/JS formatter.

### How to merge

It's easy to break the build:
- Be careful of any new commits on main not included in this PR.
- Be careful that after this PR is merged, other PRs that already passed
CI can merge.

So, make sure there is no new commits before merging this one, and
invalidate js PRs that already passed CI, force them to merge to latest.
2024-08-14 16:51:22 -07:00

359 lines
13 KiB
TypeScript

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
import { TensorView } from '../../tensor-view';
import { ComputeContext } from '../types';
import { createConv2DTransposeMatMulProgramInfo } from './3rd-party/conv_backprop_mm_webgpu';
import { createConvTranspose2DProgramInfo } from './3rd-party/conv_backprop_webgpu';
import { ConvAttributes } from './conv';
import { parseInternalActivationAttributes } from './fuse-utils';
import { createTransposeProgramInfo } from './transpose';
const computeTotalPad = (
inDim: number,
stride: number,
adj: number,
kernel: number,
dilation: number,
outSize: number,
) => (inDim - 1) * stride + adj + (kernel - 1) * dilation + 1 - outSize;
const distributePadding = (totalPad: number, autoPad: string, pads: number[], head: number, tail: number) => {
const smallPad = Math.floor(totalPad / 2);
if (autoPad === 'SAME_UPPER') {
pads[head] = smallPad;
pads[tail] = totalPad - smallPad;
} else if (autoPad === 'SAME_LOWER') {
pads[head] = totalPad - smallPad;
pads[tail] = smallPad;
}
};
const calculateOutputShapeAndPads = (
inputShape: readonly number[],
kernelShape: readonly number[],
dilations: readonly number[],
autoPad: string,
group: number,
pads: number[],
strides: readonly number[],
isChannelLast: boolean,
outputPadding: number[],
outputShape: number[],
) => {
const spatialRank = inputShape.length - 2;
const updateOutputShape = outputShape.length === 0;
if (outputPadding.length === 0) {
for (let i = 0; i < spatialRank; ++i) {
outputPadding.push(0);
}
}
const batchSize = inputShape[0];
const outChannels = kernelShape[isChannelLast ? 3 : 1] * group;
for (let i = 0, j = inputShape.length - spatialRank - (isChannelLast ? 1 : 0); i < spatialRank; ++i, ++j) {
const inSize = inputShape[j];
const outSize = updateOutputShape ? inSize * strides[i] : outputShape[i];
const totalPad = computeTotalPad(inSize, strides[i], pads[i], kernelShape[j], dilations[i], outSize);
distributePadding(totalPad, autoPad, pads, i, i + spatialRank);
if (updateOutputShape) {
outputShape.push(
strides[i] * (inSize - 1) +
outputPadding[i] +
(kernelShape[j] - 1) * dilations[i] +
1 -
pads[i] -
pads[i + spatialRank],
);
}
}
outputShape.splice(0, 0, batchSize);
outputShape.splice(isChannelLast ? 3 : 1, 0, outChannels);
};
export interface ConvTransposeAttributes extends ConvAttributes {
readonly outputPadding: readonly number[];
readonly outputShape: readonly number[];
}
const getAdjustedConvTransposeAttributes = <T extends ConvTransposeAttributes>(
attributes: T,
inputs: readonly TensorView[],
): T => {
const kernelShape = attributes.kernelShape.slice();
// if kernelShape is not specified in the attributes of this op, infer it from the weight tensor dims
if (attributes.kernelShape.length === 0 || attributes.kernelShape.reduce((a, b) => a * b, 1) === 0) {
kernelShape.length = 0;
for (let i = 2; i < inputs[1].dims.length; ++i) {
kernelShape.push(inputs[1].dims[i]);
}
}
const isChannelsLast = attributes.format === 'NHWC';
kernelShape.splice(0, 0, inputs[1].dims[0]);
kernelShape.splice(isChannelsLast ? 3 : 1, 0, inputs[1].dims[1]);
const pads = attributes.pads.slice();
const outputShape = attributes.outputShape.slice();
const outputPadding = attributes.outputPadding.slice();
const inputShape = inputs[0].dims;
let dilations = attributes.dilations.slice();
if (dilations.reduce((a, b) => a + b, 0) === 0) {
const spatialRank = inputs[0].dims.length - 2;
dilations = new Array(spatialRank).fill(1);
}
let strides = attributes.strides.slice();
if (strides.reduce((a, b) => a + b, 0) === 0) {
const spatialRank = inputs[0].dims.length - 2;
strides = new Array(spatialRank).fill(1);
}
// If outputShape is not specified in the attributes of this op, infer it from the parameters
// Similarly, automatically infer pads if not specified
calculateOutputShapeAndPads(
inputShape,
kernelShape,
dilations,
attributes.autoPad,
attributes.group,
pads,
strides,
isChannelsLast,
outputPadding,
outputShape,
);
// always return a new object so does not modify the original attributes
const newAttributes: T = Object.assign({}, attributes);
Object.assign(newAttributes, { kernelShape, pads, outputPadding, outputShape, dilations, strides });
return newAttributes;
};
export const parseConvTransposeAttributes = (attributes: Record<string, unknown>): ConvTransposeAttributes => {
const activationAttributes = parseInternalActivationAttributes(attributes);
// TODO : Make this generic enough to compute default attributes for multi-dimensional conv
const format = attributes.format as 'NHWC' | 'NCHW';
const autoPad = ['NOTSET', 'VALID', 'SAME_UPPER', 'SAME_LOWER'][
typeof attributes.autoPad == 'undefined' ? 0 : (attributes.autoPad as number)
];
const dilations = attributes.dilations as [number, number];
const group = attributes.group as number;
const kernelShape = attributes.kernelShape as [number, number];
const pads = attributes.pads as [number, number, number, number];
const strides = attributes.strides as [number, number];
const wIsConst = (attributes.wIsConst as () => boolean)();
const outputPadding = attributes.outputPadding as [number, number, number, number];
const outputShape = attributes.outputShape as [number, number];
return {
autoPad,
format,
dilations,
group,
kernelShape,
outputPadding,
outputShape,
pads,
strides,
wIsConst,
...activationAttributes,
cacheKey: `${attributes.format};${activationAttributes.activation};`,
};
};
const validateInputs = (inputs: readonly TensorView[], attributes: ConvTransposeAttributes): void => {
// Refer to the below link for all input checks
// https://github.com/onnx/onnx/blob/main/docs/Operators.md#ConvTranspose
if (!inputs || (inputs.length !== 2 && inputs.length !== 3)) {
throw new Error('Conv requires 2 or 3 inputs');
}
// TODO : Need to add support for multi-dimensional conv
if (inputs[0].dims.length !== 4 && inputs[0].dims.length !== 3) {
throw new Error('currently only support 2-dimensional conv');
}
if (inputs[0].dims.length !== inputs[1].dims.length) {
throw new Error('filter does not have same dimension as input');
}
// FILTER_IN_CHANNEL should be equal to DATA_CHANNEL
const dataChannel = inputs[0].dims[attributes.format === 'NHWC' ? inputs[0].dims.length - 1 : 1];
const filterInChannel = inputs[1].dims[0];
if (dataChannel !== filterInChannel) {
throw new Error('FILTER_IN_CHANNEL should be equal to DATA_CHANNEL');
}
const featureMaps = inputs[1].dims[1] * attributes.group;
// if bias is provided it should be 1D and the number of elements should be equal to the number of feature maps
if (inputs.length === 3 && (inputs[2].dims.length !== 1 || inputs[2].dims[0] !== featureMaps)) {
throw new Error('invalid bias');
}
const spatialRank = inputs[0].dims.length - 2;
const dilationsSet = attributes.dilations.reduce((a, b) => a + b, 0) > 0;
// wrong dilations dimension
if (dilationsSet && attributes.dilations.length !== spatialRank) {
throw new Error(`dilations should be ${spatialRank}D`);
}
const stridesSet = attributes.strides.reduce((a, b) => a + b, 0) > 0;
// Wrong strides dimension
if (stridesSet && attributes.strides.length !== spatialRank) {
throw new Error(`strides should be ${spatialRank}D`);
}
// Wrong pads dimension
const padsSet = attributes.pads.reduce((a, b) => a + b, 0) > 0;
if (padsSet && attributes.pads.length !== spatialRank * 2) {
throw new Error(`pads should be ${spatialRank * 2}D`);
}
// Wrong output padding dimension
if (attributes.outputPadding.length !== spatialRank && attributes.outputPadding.length !== 0) {
throw new Error(`output_padding should be ${spatialRank}D`);
}
// if kernelShape is specified, it's data length must be 2 less than dims length of the weights tensor
// (the first 2 dims are batch_size and channels)
const kernelShapeSet = attributes.kernelShape.reduce((a, b) => a + b, 0) > 0;
if (
kernelShapeSet &&
attributes.kernelShape.length !== 0 &&
attributes.kernelShape.length !== inputs[1].dims.length - 2
) {
throw new Error('invalid kernel shape');
}
// as with kernelShape, must have same number of spatial dims as input
if (attributes.outputShape.length !== 0 && attributes.outputShape.length !== inputs[0].dims.length - 2) {
throw new Error('invalid output shape');
}
};
// for transposing weight tensor from [C, M/group, KH, KW] to [KH, KW, M/group, C]
const weightTransposePerm = [2, 3, 1, 0];
const convTranspose2d = (
context: ComputeContext,
inputs: readonly TensorView[],
attributes: ConvTransposeAttributes,
): void => {
const adjustedAttributes = getAdjustedConvTransposeAttributes(attributes, inputs);
const isChannelsLast = attributes.format === 'NHWC';
const outputShape = adjustedAttributes.outputShape;
const outChannels = outputShape[isChannelsLast ? 3 : 1];
const inputChannels = inputs[0].dims[isChannelsLast ? 3 : 1];
// Switch to naive method when outChannels and inputChannels are very small. It's because that in this case it's
// not suitable for matmul version since matmul uses tile size 32x32 resulting the underlying execution unit
// utilization rate is very low.
if (adjustedAttributes.group !== 1 || (outChannels === 1 && inputChannels === 1)) {
context.compute(createConvTranspose2DProgramInfo(inputs, adjustedAttributes));
return;
}
const outHeight = outputShape[isChannelsLast ? 1 : 2];
const outWidth = outputShape[isChannelsLast ? 2 : 3];
const weightHeight = inputs[1].dims[2];
const weightWidth = inputs[1].dims[3];
const dimAOuter = isChannelsLast ? outHeight * outWidth : outChannels;
const dimBOuter = isChannelsLast ? outChannels : outHeight * outWidth;
const dimInner = weightHeight * weightWidth * inputChannels;
const sequentialAccessByThreads = /* backend.adapterInfo.isIntel() */ true;
// STEP.1: transpose weight
const transposedWeight =
(context.kernelCustomData.wT as TensorView | undefined) ??
context.compute(createTransposeProgramInfo(inputs[1], weightTransposePerm), {
inputs: [1],
outputs: [attributes.wIsConst ? -2 : -1],
})[0];
if (attributes.wIsConst && !context.kernelCustomData.wT) {
context.kernelCustomData.wT = transposedWeight;
}
// STEP.2: prepare reshaped inputs
const convTransposeInputs = [inputs[0], transposedWeight];
const hasBias = inputs.length === 3;
if (hasBias) {
if (!isChannelsLast && inputs[2].dims.length === 1) {
convTransposeInputs.push(inputs[2].reshape([inputs[2].dims[0], 1, 1]));
} else {
convTransposeInputs.push(inputs[2]);
}
}
// STEP.3: compute matmul
context.compute(
createConv2DTransposeMatMulProgramInfo(
convTransposeInputs,
adjustedAttributes,
outputShape,
dimAOuter,
dimBOuter,
dimInner,
hasBias,
sequentialAccessByThreads,
),
{ inputs: convTransposeInputs },
);
};
const convTranspose1d = (context: ComputeContext, attributes: ConvTransposeAttributes): void => {
// extend the input to 2D by adding H dimension
const isChannelLast = attributes.format === 'NHWC';
const inputs = [
context.inputs[0].reshape(
isChannelLast
? // [N, W, C] -> [N, H=1, W, C]
[context.inputs[0].dims[0], 1, context.inputs[0].dims[1], context.inputs[0].dims[2]]
: // [N, C, W] -> [N, C, H=1, W]
[context.inputs[0].dims[0], context.inputs[0].dims[1], 1, context.inputs[0].dims[2]],
),
//[FILTER_OUT_CHANNEL, FILTER_IN_CHANNEL, kW] -> [FILTER_OUT_CHANNEL, FILTER_IN_CHANNEL, kH=1, kW]
context.inputs[1].reshape([context.inputs[1].dims[0], context.inputs[1].dims[1], 1, context.inputs[1].dims[2]]),
];
if (context.inputs.length === 3) {
inputs.push(context.inputs[2]);
}
let kernelShape = attributes.kernelShape;
if (kernelShape.length === 0 || kernelShape[0] === 0) {
kernelShape = [context.inputs[1].dims[2]];
}
let dilations = attributes.dilations;
if (dilations.length === 0 || dilations[0] === 0) {
dilations = [1];
}
let strides = attributes.strides;
if (strides.length === 0 || strides[0] === 0) {
strides = [1];
}
let pads = attributes.pads;
if (pads.length === 0) {
pads = [0, 0];
}
pads = [0, pads[0], 0, pads[1]];
strides = [1].concat(strides);
dilations = [1].concat(dilations);
kernelShape = [1].concat(kernelShape);
const adjustedAttributes = getAdjustedConvTransposeAttributes(
{ ...attributes, pads, strides, dilations, kernelShape },
inputs,
);
context.compute(
createConvTranspose2DProgramInfo(inputs, adjustedAttributes, (outputShape) =>
isChannelLast
? [outputShape[0], outputShape[2], outputShape[3]]
: [outputShape[0], outputShape[1], outputShape[3]],
),
);
};
export const convTranspose = (context: ComputeContext, attributes: ConvTransposeAttributes): void => {
validateInputs(context.inputs, attributes);
if (context.inputs[0].dims.length === 3) {
convTranspose1d(context, attributes);
} else {
convTranspose2d(context, context.inputs, attributes);
}
};