mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-06 00:03:22 +00:00
### Description
See
454996d496
for manual changes (excluded auto-generated formatting changes)
### Why
Because the toolsets for old clang-format is out-of-date. This reduces
the development efficiency.
- The NPM package `clang-format` is already in maintenance mode. not
updated since 2 years ago.
- The VSCode extension for clang-format is not maintained for a while,
and a recent Node.js security update made it not working at all in
Windows.
No one in community seems interested in fixing those.
Choose Prettier as it is the most popular TS/JS formatter.
### How to merge
It's easy to break the build:
- Be careful of any new commits on main not included in this PR.
- Be careful that after this PR is merged, other PRs that already passed
CI can merge.
So, make sure there is no new commits before merging this one, and
invalidate js PRs that already passed CI, force them to merge to latest.
160 lines
5.6 KiB
TypeScript
160 lines
5.6 KiB
TypeScript
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
// Licensed under the MIT License.
|
|
|
|
// TODO: this is the same naive implementation we use for reduce that has
|
|
// performance limitations when the reduced axis is long. Need to add
|
|
// a optimized codepath for this.
|
|
|
|
import { DataType } from '../../../wasm-common';
|
|
import { TensorView } from '../../tensor-view';
|
|
import { ShapeUtil } from '../../util';
|
|
import { AttributeWithCacheKey, createAttributeWithCacheKey } from '../attribute-with-cache-key';
|
|
import { ComputeContext, ProgramInfo } from '../types';
|
|
|
|
import {
|
|
getMaxComponents,
|
|
inputVariable,
|
|
outputVariable,
|
|
ShaderHelper,
|
|
sumVector,
|
|
tensorTypeToWsglStorageType,
|
|
} from './common';
|
|
|
|
const validateInputs = (inputs: readonly TensorView[]): void => {
|
|
if (!inputs || inputs.length !== 1) {
|
|
throw new Error('Softmax op requires 1 input.');
|
|
}
|
|
};
|
|
|
|
export interface SoftmaxAttributes extends AttributeWithCacheKey {
|
|
readonly axis: number;
|
|
}
|
|
|
|
const createSoftmaxProgramInfo = (input: TensorView, attributes: SoftmaxAttributes): ProgramInfo => {
|
|
const shape = input.dims;
|
|
const outputSize = ShapeUtil.size(shape);
|
|
const WG = 64;
|
|
let axis = attributes.axis;
|
|
if (axis < 0) {
|
|
axis = shape.length + axis;
|
|
}
|
|
if (axis < shape.length - 1) {
|
|
throw new Error('softmax only supports last axis for now.');
|
|
}
|
|
|
|
const cols = shape[axis];
|
|
const rows = outputSize / cols;
|
|
const components = getMaxComponents(cols);
|
|
const packedCols = cols / components;
|
|
|
|
const maxVector = (name: string, components: number) => {
|
|
if (components === 4) {
|
|
return `max(max(${name}.x, ${name}.y), max(${name}.z, ${name}.w))`;
|
|
} else if (components === 2) {
|
|
return `max(${name}.x, ${name}.y)`;
|
|
} else if (components === 3) {
|
|
return `max(max(${name}.x, ${name}.y), ${name}.z)`;
|
|
}
|
|
|
|
return name;
|
|
};
|
|
const x = inputVariable('x', input.dataType, input.dims, components);
|
|
const output = outputVariable('result', input.dataType, input.dims, components);
|
|
const valueType = x.type.value;
|
|
// 6.2.4 in wgsl spec
|
|
const threadMaxDecl =
|
|
tensorTypeToWsglStorageType(input.dataType) === 'f32'
|
|
? `var threadMax = ${valueType}(-3.402823e+38f);`
|
|
: `var threadMax = ${valueType}(-65504.0h);`;
|
|
const getShaderSource = (shaderHelper: ShaderHelper) => `
|
|
var<workgroup> rowMaxShared : ${valueType};
|
|
var<workgroup> rowSumShared : ${valueType};
|
|
var<workgroup> threadShared : array<${valueType}, ${WG}>;
|
|
|
|
fn getValue(row: i32, col: i32, row_stride: i32) -> ${valueType} {
|
|
let index = row * row_stride + col;
|
|
return x[index];
|
|
}
|
|
|
|
fn setValue(row: i32, col: i32, row_stride: i32, value: ${valueType}) {
|
|
let index = row * row_stride + col;
|
|
result[index] = value;
|
|
}
|
|
${shaderHelper.registerUniform('packedCols', 'i32').declareVariables(x, output)}
|
|
${shaderHelper.mainStart()}
|
|
let gindex = i32(global_idx);
|
|
let lindex = i32(local_idx);
|
|
const wg = ${WG};
|
|
let row = gindex / wg;
|
|
let cols = uniforms.packedCols;
|
|
let row_stride : i32 = uniforms.packedCols;
|
|
|
|
// find the rows max
|
|
${threadMaxDecl}
|
|
for (var col = lindex; col < cols; col += wg) {
|
|
let value = getValue(row, col, row_stride);
|
|
threadMax = max(threadMax, value);
|
|
}
|
|
if (lindex < cols) {
|
|
threadShared[lindex] = threadMax;
|
|
}
|
|
workgroupBarrier();
|
|
|
|
var reduceSize = min(cols, wg);
|
|
for (var currSize = reduceSize >> 1; currSize > 0; currSize = reduceSize >> 1) {
|
|
reduceSize = currSize + (reduceSize & 1);
|
|
if (lindex < currSize) {
|
|
threadShared[lindex] = max(threadShared[lindex], threadShared[lindex + reduceSize]);
|
|
}
|
|
workgroupBarrier();
|
|
}
|
|
if (lindex == 0) {
|
|
rowMaxShared = ${valueType}(${maxVector('threadShared[0]', components)});
|
|
}
|
|
workgroupBarrier();
|
|
|
|
// find the rows sum
|
|
var threadSum = ${valueType}(0.0);
|
|
for (var col = lindex; col < cols; col += wg) {
|
|
let subExp = exp(getValue(row, col, row_stride) - rowMaxShared);
|
|
threadSum += subExp;
|
|
}
|
|
threadShared[lindex] = threadSum;
|
|
workgroupBarrier();
|
|
|
|
for (var currSize = wg >> 1; currSize > 0; currSize = currSize >> 1) {
|
|
if (lindex < currSize) {
|
|
threadShared[lindex] = threadShared[lindex] + threadShared[lindex + currSize];
|
|
}
|
|
workgroupBarrier();
|
|
}
|
|
if (lindex == 0) {
|
|
rowSumShared = ${valueType}(${sumVector('threadShared[0]', components)});
|
|
}
|
|
workgroupBarrier();
|
|
|
|
// calculate final value for each element in the row
|
|
for (var col = lindex; col < cols; col += wg) {
|
|
let value = exp(getValue(row, col, row_stride) - rowMaxShared) / rowSumShared;
|
|
setValue(row, col, row_stride, value);
|
|
}
|
|
}`;
|
|
return {
|
|
name: 'Softmax',
|
|
shaderCache: { hint: `${components}`, inputDependencies: ['type'] },
|
|
getRunData: () => ({
|
|
outputs: [{ dims: shape, dataType: input.dataType }],
|
|
dispatchGroup: { x: rows },
|
|
programUniforms: [{ type: DataType.int32, data: packedCols }],
|
|
}),
|
|
getShaderSource,
|
|
};
|
|
};
|
|
|
|
export const softmax = (context: ComputeContext, attributes: SoftmaxAttributes): void => {
|
|
validateInputs(context.inputs);
|
|
context.compute(createSoftmaxProgramInfo(context.inputs[0], attributes));
|
|
};
|
|
|
|
export const parseSoftmaxAttributes = (attributes: Record<string, unknown>): SoftmaxAttributes =>
|
|
createAttributeWithCacheKey({ axis: attributes.axis as number });
|