From c79307e7b4bee3d7f4c3406a6559faaaf87c2fcb Mon Sep 17 00:00:00 2001
From: Sunghoon <35605090+hanbitmyths@users.noreply.github.com>
Date: Tue, 26 Oct 2021 23:58:50 -0700
Subject: [PATCH] [js/web] support opset-13 of softmax (#9493)

* add p50 in test

* support opset-13 of softmax

* update a operators.md

* resolve comments

* fix lint and format

Co-authored-by: Yulong Wang <yulongw@microsoft.com>
---
 js/web/docs/operators.md                      |   2 +-
 .../onnxjs/backends/webgl/op-resolve-rules.ts |   3 +-
 .../lib/onnxjs/backends/webgl/ops/softmax.ts  | 152 ++++++++++++------
 js/web/test/suite-test-list.jsonc             |  12 +-
 4 files changed, 115 insertions(+), 54 deletions(-)

diff --git a/js/web/docs/operators.md b/js/web/docs/operators.md
index c675916d78..fb7e7ab6d6 100644
--- a/js/web/docs/operators.md
+++ b/js/web/docs/operators.md
@@ -152,7 +152,7 @@ See [Compatibility](../README.md#Compatibility) for a list of the supported plat
 | [Sinh](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Sinh) |  |
 | [Size](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Size) |  |
 | [Slice](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Slice) | [1-9](https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Slice-1), [10](https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Slice-10), [11-12](https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Slice-11), [13+](https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Slice-13) |
-| [Softmax](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Softmax) | [1-10](https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Softmax-1), [11-12](https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Softmax-11) |
+| [Softmax](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Softmax) | [1-10](https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Softmax-1), [11-12](https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Softmax-11), [13+](https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Softmax-13) |
 | [SoftmaxCrossEntropyLoss](https://github.com/onnx/onnx/blob/master/docs/Operators.md#SoftmaxCrossEntropyLoss) |  |
 | [Softplus](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Softplus) |  |
 | [Softsign](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Softsign) |  |
diff --git a/js/web/lib/onnxjs/backends/webgl/op-resolve-rules.ts b/js/web/lib/onnxjs/backends/webgl/op-resolve-rules.ts
index 63438201dc..e4c2ea34d5 100644
--- a/js/web/lib/onnxjs/backends/webgl/op-resolve-rules.ts
+++ b/js/web/lib/onnxjs/backends/webgl/op-resolve-rules.ts
@@ -26,7 +26,7 @@ import {reshape} from './ops/reshape';
 import {parseResizeAttributesV10, parseResizeAttributesV11, resize} from './ops/resize-packed';
 import {shape} from './ops/shape';
 import {parseSliceAttributes, slice, sliceV10} from './ops/slice';
-import {parseSoftmaxAttributes, softmax} from './ops/softmax';
+import {parseSoftmaxAttributes, parseSoftmaxAttributesV13, softmax, softmaxV13} from './ops/softmax';
 import {parseSplitAttributes, split} from './ops/split';
 import {parseSqueezeAttributes, squeeze, squeezeV13} from './ops/squeeze';
 import {sum} from './ops/sum';
@@ -102,6 +102,7 @@ export const WEBGL_OP_RESOLVE_RULES: readonly OpSet.ResolveRule[] = [
   ['Slice', '', '1-9', slice, parseSliceAttributes],
   // The "semantic" meaning of axis has changed in opset-13.
   ['Softmax', '', '1-12', softmax, parseSoftmaxAttributes],
+  ['Softmax', '', '13+', softmaxV13, parseSoftmaxAttributesV13],
   // 'Split' operator has an optional attribute 'split'
   // this attribute determines how the specified axis of input data is split.
   // When the attribute is missing, we need the count of number of outputs
diff --git a/js/web/lib/onnxjs/backends/webgl/ops/softmax.ts b/js/web/lib/onnxjs/backends/webgl/ops/softmax.ts
index 055ec55eea..a6677614b8 100644
--- a/js/web/lib/onnxjs/backends/webgl/ops/softmax.ts
+++ b/js/web/lib/onnxjs/backends/webgl/ops/softmax.ts
@@ -9,6 +9,7 @@ import {ShapeUtil} from '../../../util';
 import {getGlsl} from '../glsl-source';
 import {WebGLInferenceHandler} from '../inference-handler';
 import {ProgramInfo, TextureType} from '../types';
+import {transpose, TransposeAttributes} from './transpose';
 
 export interface SoftmaxAttributes extends AttributeWithCacheKey {
   readonly axis: number;
@@ -38,62 +39,123 @@ export const softmax: OperatorImplementation<SoftmaxAttributes> =
 
       const inputShape = inputs[0].dims.slice();
       const axis = ShapeUtil.normalizeAxis(attributes.axis, inputShape.length);
-      const N = ShapeUtil.sizeToDimension(inputShape, axis);
-      const D = ShapeUtil.sizeFromDimension(inputShape, axis);
+      const logicalRowCount = ShapeUtil.sizeToDimension(inputShape, axis);
+      const featureCount = ShapeUtil.sizeFromDimension(inputShape, axis);
 
-      const computeMaxProgramInfo = createComputeMaxProgramInfo(inferenceHandler, inputs[0], N, D, [N]);
+      const output = computeSoftmax(inferenceHandler, inputs, attributes, logicalRowCount, featureCount);
+      return output;
+    };
+
+export const parseSoftmaxAttributes: OperatorInitialization<SoftmaxAttributes> =
+    (node: Graph.Node): SoftmaxAttributes => createAttributeWithCacheKey({axis: node.attributes.getInt('axis', 1)});
+
+export const parseSoftmaxAttributesV13: OperatorInitialization<SoftmaxAttributes> =
+    (node: Graph.Node): SoftmaxAttributes => createAttributeWithCacheKey({axis: node.attributes.getInt('axis', -1)});
+
+// The "semantic" meaning of axis has changed in opset-13.
+// Please compare: https://github.com/onnx/onnx/blob/master/docs/Operators.md#Softmax
+// with https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Softmax-11 for detailed explanations
+// To account for the opset-13 behavior, our plan will be to transpose the "axis" dim to the innermost dim
+// and perform softmax and then reverse the transpose. We can skip the transposing aspect if the axis is already
+// the innermost dim
+export const softmaxV13: OperatorImplementation<SoftmaxAttributes> =
+    (inferenceHandler: WebGLInferenceHandler, inputs: Tensor[], attributes: SoftmaxAttributes): Tensor[] => {
+      validateInputs(inputs);
+
+      const inputShape = inputs[0].dims.slice();
+      const axis = ShapeUtil.normalizeAxis(attributes.axis, inputShape.length);
+      const rank = inputShape.length;
+
+      const isTransposeRequired = (axis !== rank - 1) ? true : false;
+      const transposedInputShape: number[] = [];
+      let perm: number[] = [];
+      let transposedInputs: Tensor[] = [];
+      let transposeAttribute: TransposeAttributes;
+
+      if (isTransposeRequired) {
+        perm = Array.from({length: rank}).map((_, i) => i);
+
+        // swap the innermost dim with the dim corresponding to axis
+        perm[axis] = rank - 1;
+        perm[rank - 1] = axis;
+
+        perm.map(p => transposedInputShape.push(inputShape[p]));
+
+        transposeAttribute = createAttributeWithCacheKey({perm});
+        transposedInputs = transpose(inferenceHandler, inputs, transposeAttribute);
+      }
+
+      const logicalRowCount = isTransposeRequired ? ShapeUtil.sizeToDimension(transposedInputShape, rank - 1) :
+                                                    ShapeUtil.sizeToDimension(inputShape, rank - 1);
+      const featureCount = isTransposeRequired ? ShapeUtil.sizeFromDimension(transposedInputShape, rank - 1) :
+                                                 ShapeUtil.sizeFromDimension(inputShape, rank - 1);
+
+      const output = computeSoftmax(
+          inferenceHandler, isTransposeRequired ? transposedInputs : inputs, attributes, logicalRowCount, featureCount);
+
+      if (isTransposeRequired) {
+        const reversedOutput = transpose(inferenceHandler, output, transposeAttribute!);
+        return reversedOutput;
+      } else {
+        return output;
+      }
+    };
+
+const computeSoftmax =
+    (inferenceHandler: WebGLInferenceHandler, inputs: Tensor[], attributes: SoftmaxAttributes, logicalRowCount: number,
+     featureCount: number): Tensor[] => {
+      const computeMaxProgramInfo =
+          createComputeMaxProgramInfo(inferenceHandler, inputs[0], logicalRowCount, featureCount, [logicalRowCount]);
       const max = inferenceHandler.run(
           {...softmaxComputeMaxProgramMetadata, cacheHint: attributes.cacheKey, get: () => computeMaxProgramInfo},
           inputs);
 
-      const computeScaleProgramInfo =
-          createComputScaleProgramInfo(inferenceHandler, inputs[0], N, D, computeMaxProgramInfo.output.dims, [N]);
+      const computeScaleProgramInfo = createComputScaleProgramInfo(
+          inferenceHandler, inputs[0], logicalRowCount, featureCount, computeMaxProgramInfo.output.dims,
+          [logicalRowCount]);
       const scale = inferenceHandler.run(
           {...softmaxComputeScaleProgramMetadata, cacheHint: attributes.cacheKey, get: () => computeScaleProgramInfo},
           [inputs[0], max]);
 
       const softMaxProgramInfo = createSoftMaxProgramInfo(
-          inferenceHandler, inputs[0], N, D, computeMaxProgramInfo.output.dims, computeScaleProgramInfo.output.dims);
+          inferenceHandler, inputs[0], logicalRowCount, featureCount, computeMaxProgramInfo.output.dims,
+          computeScaleProgramInfo.output.dims);
       const output = inferenceHandler.run(
           {...softmaxProgramMetadata, cacheHint: attributes.cacheKey, get: () => softMaxProgramInfo},
           [inputs[0], max, scale]);
       return [output];
     };
 
-export const parseSoftmaxAttributes: OperatorInitialization<SoftmaxAttributes> =
-    (node: Graph.Node): SoftmaxAttributes => createAttributeWithCacheKey({axis: node.attributes.getInt('axis', 1)});
-
 /**
  * Create a texture that contains the maximum value of each of the 'N' rows
  */
 const createComputeMaxProgramInfo =
-    // eslint-disable-next-line @typescript-eslint/naming-convention
-    (inferenceHandler: WebGLInferenceHandler, input: Tensor, N: number, D: number, outputShape: number[]):
-        ProgramInfo => {
-          const [textureWidth, textureHeight] =
-              inferenceHandler.calculateTextureWidthAndHeight(input.dims, TextureType.unpacked);
-          const rank = outputShape.length;
+    (inferenceHandler: WebGLInferenceHandler, input: Tensor, logicalRowCount: number, featureCount: number,
+     outputShape: number[]): ProgramInfo => {
+      const [textureWidth, textureHeight] =
+          inferenceHandler.calculateTextureWidthAndHeight(input.dims, TextureType.unpacked);
+      const rank = outputShape.length;
 
-          if (N < 1 || D < 1) {
-            throw new Error('Logical row count N and feature count D must be greater than or equal to 1');
-          }
+      if (logicalRowCount < 1 || featureCount < 1) {
+        throw new Error('Logical row count N and feature count D must be greater than or equal to 1');
+      }
 
-          if (outputShape.length !== 1) {
-            throw new Error('Dimensionality of the output should be 1');
-          }
+      if (outputShape.length !== 1) {
+        throw new Error('Dimensionality of the output should be 1');
+      }
 
-          if (outputShape[0] !== N) {
-            throw new Error('Shape of the output should be equal to logical row count');
-          }
+      if (outputShape[0] !== logicalRowCount) {
+        throw new Error('Shape of the output should be equal to logical row count');
+      }
 
-          const glsl = getGlsl(inferenceHandler.session.backend.glContext.version);
-          const shaderSource = `
+      const glsl = getGlsl(inferenceHandler.session.backend.glContext.version);
+      const shaderSource = `
       float process(int[${rank}] indices) {
-        int logical_row_start_offset = indices[0] * ${D};
+        int logical_row_start_offset = indices[0] * ${featureCount};
 
         float max = getColorAsFloat(${glsl.texture2D}(A, offsetToCoords(logical_row_start_offset, ${textureWidth},
         ${textureHeight} )));
-        for(int i=1; i<${D}; ++i)
+        for(int i=1; i<${featureCount}; ++i)
         {
           float current = getColorAsFloat(${glsl.texture2D}(A, offsetToCoords(logical_row_start_offset + i,
             ${textureWidth}, ${textureHeight})));
@@ -103,25 +165,24 @@ const createComputeMaxProgramInfo =
 
         return max;
       }`;
-          return {
-            ...softmaxComputeMaxProgramMetadata,
-            output: {dims: outputShape, type: input.type, textureType: TextureType.unpacked},
-            shaderSource
-          };
-        };
+      return {
+        ...softmaxComputeMaxProgramMetadata,
+        output: {dims: outputShape, type: input.type, textureType: TextureType.unpacked},
+        shaderSource
+      };
+    };
 
 /**
  * Create a texture that contains the normalization factor for each of the 'N' rows
  */
 const createComputScaleProgramInfo =
-    // eslint-disable-next-line @typescript-eslint/naming-convention
-    (inferenceHandler: WebGLInferenceHandler, input: Tensor, N: number, D: number,
+    (inferenceHandler: WebGLInferenceHandler, input: Tensor, logicalRowCount: number, featureCount: number,
      maxElementPerLogicalRow: readonly number[], outputShape: number[]): ProgramInfo => {
       const [textureWidth, textureHeight] =
           inferenceHandler.calculateTextureWidthAndHeight(input.dims, TextureType.unpacked);
       const rank = outputShape.length;
 
-      if (N < 1 || D < 1) {
+      if (logicalRowCount < 1 || featureCount < 1) {
         throw new Error('Logical row count N and feature count D must be greater than or equal to 1');
       }
 
@@ -129,7 +190,7 @@ const createComputScaleProgramInfo =
         throw new Error('Dimensionality of the output should be 1');
       }
 
-      if (outputShape[0] !== N) {
+      if (outputShape[0] !== logicalRowCount) {
         throw new Error('Shape of the output should be equal to logical row count');
       }
 
@@ -137,18 +198,18 @@ const createComputScaleProgramInfo =
         throw new Error('Dimensionality of the intermediate results should be 1');
       }
 
-      if (maxElementPerLogicalRow[0] !== N) {
+      if (maxElementPerLogicalRow[0] !== logicalRowCount) {
         throw new Error('Shape of the intermediate results should be equal to logical row count');
       }
 
       const glsl = getGlsl(inferenceHandler.session.backend.glContext.version);
       const shaderSource = `
       float process(int[${rank}] indices) {
-        int logical_row_start_offset = indices[0] * ${D};
+        int logical_row_start_offset = indices[0] * ${featureCount};
 
         float norm_factor = 0.0;
         float max = _Max(indices);
-        for(int i=0; i<${D}; ++i)
+        for(int i=0; i<${featureCount}; ++i)
         {
           norm_factor += exp(getColorAsFloat(${glsl.texture2D}(A, offsetToCoords(logical_row_start_offset + i,
             ${textureWidth}, ${textureHeight}))) - max);
@@ -164,14 +225,13 @@ const createComputScaleProgramInfo =
     };
 
 const createSoftMaxProgramInfo =
-    // eslint-disable-next-line @typescript-eslint/naming-convention
-    (inferenceHandler: WebGLInferenceHandler, input: Tensor, N: number, D: number,
+    (inferenceHandler: WebGLInferenceHandler, input: Tensor, logicalRowCount: number, featureCount: number,
      maxElementPerLogicalRow: readonly number[], normalizationPerLogicalRow: readonly number[]): ProgramInfo => {
       const [textureWidth, textureHeight] =
           inferenceHandler.calculateTextureWidthAndHeight(input.dims, TextureType.unpacked);
       const rank = input.dims.length;
 
-      if (N < 1 || D < 1) {
+      if (logicalRowCount < 1 || featureCount < 1) {
         throw new Error('Logical row count N and feature count D must be greater than or equal to 1');
       }
 
@@ -179,7 +239,7 @@ const createSoftMaxProgramInfo =
         throw new Error('Dimensionality of the intermediate results should be 1');
       }
 
-      if (maxElementPerLogicalRow[0] !== N || normalizationPerLogicalRow[0] !== N) {
+      if (maxElementPerLogicalRow[0] !== logicalRowCount || normalizationPerLogicalRow[0] !== logicalRowCount) {
         throw new Error('Shape of the intermediate results should be equal to logical row count');
       }
 
@@ -191,7 +251,7 @@ const createSoftMaxProgramInfo =
 
       //determine the logical row for this index
       int logical_row_index[1];
-      logical_row_index[0] = offset / ${D};
+      logical_row_index[0] = offset / ${featureCount};
 
       float norm_factor = _Norm(logical_row_index);
 
diff --git a/js/web/test/suite-test-list.jsonc b/js/web/test/suite-test-list.jsonc
index c81d77850f..19d3e59274 100644
--- a/js/web/test/suite-test-list.jsonc
+++ b/js/web/test/suite-test-list.jsonc
@@ -137,13 +137,13 @@
       "test_sigmoid_example",
       "test_sin_example",
       "test_sin",
-      "v{7,8,9,10,11,12}/test_softmax_axis_0",
-      "v{7,8,9,10,11,12}/test_softmax_axis_1",
-      "v{7,8,9,10,11,12}/test_softmax_axis_2",
-      "v{7,8,9,10,11,12}/test_softmax_default_axis",
-      "v{7,8,9,10,11,12}/test_softmax_example",
+      "test_softmax_axis_0",
+      "test_softmax_axis_1",
+      "test_softmax_axis_2",
+      "test_softmax_default_axis",
+      "test_softmax_example",
       {
-        "name": "v{7,8,9,10,11,12}/test_softmax_large_number",
+        "name": "test_softmax_large_number",
         "condition": "^((?!iOS).)*$" // does NOT contains 'iOS': large number cannot be handled in a half_float environment
       },
       "test_sub_bcast",