diff --git a/js/web/docs/webgpu-operators.md b/js/web/docs/webgpu-operators.md index 61b68a777b..68bb9c06b8 100644 --- a/js/web/docs/webgpu-operators.md +++ b/js/web/docs/webgpu-operators.md @@ -34,6 +34,7 @@ Do not modify directly.* | Cos | ai.onnx(7+) | | | Cosh | ai.onnx(9+) | | | CumSum | ai.onnx(11-13,14+) | | +| DepthToSpace | ai.onnx(11-12,13+); com.ms.internal.nhwc(11-12,13+) | | | Div | ai.onnx(7-12,13,14+) | | | Einsum | ai.onnx(12+) | | | Elu | ai.onnx(6+) | | diff --git a/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts b/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts index 575cf296aa..5627365100 100644 --- a/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts +++ b/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts @@ -11,6 +11,7 @@ import {concat, parseConcatAttributes} from './ops/concat'; import {conv, parseConvAttributes} from './ops/conv'; import {convTranspose, parseConvTransposeAttributes} from './ops/conv-transpose'; import {cumsum, parseCumSumAttributes} from './ops/cumsum'; +import {depthToSpace, parseDepthToSpaceAttributes} from './ops/depth-to-space'; import {einsum, parseEinsumAttributes} from './ops/einsum'; import {expand} from './ops/expand'; import {fastGelu} from './ops/fast-gelu'; @@ -68,6 +69,7 @@ export const WEBGPU_OP_RESOLVE_RULES: Map = new ['Cos', [unaryOps.cos]], ['Cosh', [unaryOps.cosh]], ['CumSum', [cumsum, parseCumSumAttributes]], + ['DepthToSpace', [depthToSpace, parseDepthToSpaceAttributes]], ['Div', [binaryOps.div]], ['Einsum', [einsum, parseEinsumAttributes]], ['Elu', [unaryOps.elu, unaryOps.parseAlphaAttributes]], diff --git a/js/web/lib/wasm/jsep/webgpu/ops/depth-to-space.ts b/js/web/lib/wasm/jsep/webgpu/ops/depth-to-space.ts new file mode 100644 index 0000000000..83809b3d5d --- /dev/null +++ b/js/web/lib/wasm/jsep/webgpu/ops/depth-to-space.ts @@ -0,0 +1,110 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +import {DataType} from '../../../wasm-common'; +import {TensorView} from '../../tensor-view'; +import {ShapeUtil} from '../../util'; +import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key'; +import {ComputeContext, ProgramInfo} from '../types'; + +import {createTensorShapeVariables, IndicesHelper, inputVariable, outputVariable, ShaderHelper} from './common'; + +export interface FormatAttributes { + readonly format: 'NHWC'|'NCHW'; +} + +export interface DepthToSpaceAttributes extends FormatAttributes, AttributeWithCacheKey { + readonly blocksize: number; + readonly mode: string; +} + +const validateInputs = (inputs: readonly TensorView[]): void => { + if (!inputs || inputs.length !== 1) { + throw new Error('DepthToSpace requires 1 input.'); + } + if (inputs[0].dims.length !== 4) { + throw new Error('DepthToSpace requires 4D input.'); + } +}; + +const permFunctionBody = (perm: number[], rank: number, input: IndicesHelper, output: IndicesHelper): string => { + const reverseFunc = []; + reverseFunc.push(`fn perm(i: ${output.type.indices}) -> ${input.type.indices} { + var a: ${input.type.indices};`); + for (let i = 0; i < rank; ++i) { + reverseFunc.push(input.indicesSet('a', perm[i], `i[${i}]`)); + } + reverseFunc.push('return a;}'); + return reverseFunc.join('\n'); +}; + +const createDepthToSpaceProgramInfo = (inputTensor: TensorView, attributes: DepthToSpaceAttributes): ProgramInfo => { + let n: number, h: number, w: number, c: number; + let shape: number[]; + let perm: number[]; + const isChannelLast = attributes.format === 'NHWC'; + const blocksize = attributes.blocksize; + const isDCRmode = attributes.mode === 'DCR'; + if (isChannelLast) { + [n, h, w, c] = inputTensor.dims; + shape = isDCRmode ? [n, h, w, blocksize, blocksize, c / (blocksize ** 2)] : + [n, h, w, c / (blocksize ** 2), blocksize, blocksize]; + perm = isDCRmode ? [0, 1, 3, 2, 4, 5] : [0, 1, 4, 2, 5, 3]; + } else { + [n, h, w, c] = [inputTensor.dims[0], inputTensor.dims[2], inputTensor.dims[3], inputTensor.dims[1]]; + shape = isDCRmode ? [n, blocksize, blocksize, c / (blocksize ** 2), h, w] : + [n, c / (blocksize ** 2), blocksize, blocksize, h, w]; + perm = isDCRmode ? [0, 3, 4, 1, 5, 2] : [0, 1, 4, 2, 5, 3]; + } + const reshapedInputTensor = inputTensor.reshape(shape); + const reshapedInputRank = reshapedInputTensor.dims.length; + const inputDataType = inputTensor.dataType; + + const reshapedInput = inputVariable('a', inputDataType, reshapedInputRank); + const permedOutput = outputVariable('output', inputDataType, reshapedInputRank); + + const getShaderSource = (shaderHelper: ShaderHelper) => ` + ${shaderHelper.registerUniform('output_size', 'u32').declareVariables(reshapedInput, permedOutput)} + + ${permFunctionBody(perm, reshapedInputRank, reshapedInput, permedOutput)} + + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes('uniforms.output_size')} + + let indices = ${permedOutput.offsetToIndices('global_idx')}; + let aIndices = perm(indices); + + ${permedOutput.setByOffset('global_idx', reshapedInput.getByIndices('aIndices'))} + }`; + + return { + name: 'DepthToSpace', + shaderCache: {hint: `${inputTensor.dims};${attributes.blocksize};${attributes.mode}`, inputDependencies: ['rank']}, + getRunData: (inputs) => { + const outputShape = isChannelLast ? [n, h * blocksize, w * blocksize, c / (blocksize ** 2)] : + [n, c / (blocksize ** 2), h * blocksize, w * blocksize]; + const outputSize = ShapeUtil.size(outputShape); + const shapeBeforePerm = reshapedInputTensor.dims; + const shapeAfterPerm = ShapeUtil.sortBasedOnPerm(shapeBeforePerm, perm); + return { + outputs: [{dims: outputShape, dataType: inputs[0].dataType}], + dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)}, + programUniforms: + [{type: DataType.uint32, data: outputSize}, ...createTensorShapeVariables(shapeBeforePerm, shapeAfterPerm)], + }; + }, + getShaderSource, + }; +}; + +export const depthToSpace = (context: ComputeContext, attributes: DepthToSpaceAttributes): void => { + validateInputs(context.inputs); + context.compute(createDepthToSpaceProgramInfo(context.inputs[0], attributes)); +}; + +export const parseDepthToSpaceAttributes = (attributes: Record): DepthToSpaceAttributes => + createAttributeWithCacheKey({ + blocksize: attributes.blocksize as number, + mode: attributes.mode as string, + format: attributes.format as 'NHWC' | 'NCHW' + }); diff --git a/js/web/test/suite-test-list.jsonc b/js/web/test/suite-test-list.jsonc index 3a4eac7890..78b1b6d49a 100644 --- a/js/web/test/suite-test-list.jsonc +++ b/js/web/test/suite-test-list.jsonc @@ -472,11 +472,11 @@ // "test_cumsum_2d_axis_0", // "test_cumsum_2d_axis_1", // "test_cumsum_2d_negative_axis", - // "test_depthtospace_crd_mode_example", - // "test_depthtospace_crd_mode", - // "test_depthtospace_dcr_mode", - // "test_depthtospace_example", - // "test_depthtospace", + "test_depthtospace_crd_mode_example", + "test_depthtospace_crd_mode", + "test_depthtospace_dcr_mode", + "test_depthtospace_example", + "test_depthtospace", // // "test_dequantizelinear_axis", // // "test_dequantizelinear", // // "test_det_2d", @@ -1350,7 +1350,7 @@ "cos.jsonc", "div.jsonc", "div_int32.jsonc", - //"depth-to-space.jsonc", + "depth-to-space.jsonc", "equal.jsonc", "exp.jsonc", "expand.jsonc", diff --git a/onnxruntime/core/providers/js/js_execution_provider.cc b/onnxruntime/core/providers/js/js_execution_provider.cc index 038423104d..0ad62b87d3 100644 --- a/onnxruntime/core/providers/js/js_execution_provider.cc +++ b/onnxruntime/core/providers/js/js_execution_provider.cc @@ -239,6 +239,11 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 16, Whe class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 12, Transpose); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, Transpose); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 12, DepthToSpace); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, DepthToSpace); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 11, 12, DepthToSpace); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 13, DepthToSpace); + class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 10, Conv); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, Conv); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kMSInternalNHWCDomain, 1, 10, Conv); @@ -534,6 +539,11 @@ std::unique_ptr RegisterKernels() { BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, diff --git a/onnxruntime/core/providers/js/operators/depth_to_space.cc b/onnxruntime/core/providers/js/operators/depth_to_space.cc new file mode 100644 index 0000000000..4833859bd2 --- /dev/null +++ b/onnxruntime/core/providers/js/operators/depth_to_space.cc @@ -0,0 +1,46 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "depth_to_space.h" + +namespace onnxruntime { +namespace js { + +ONNX_OPERATOR_KERNEL_EX( + DepthToSpace, + kMSInternalNHWCDomain, + 13, + kJsExecutionProvider, + KernelDefBuilder() + .TypeConstraint("T", JsepSupportedDataTypes()), + DepthToSpace); + +ONNX_OPERATOR_KERNEL_EX( + DepthToSpace, + kOnnxDomain, + 13, + kJsExecutionProvider, + KernelDefBuilder() + .TypeConstraint("T", JsepSupportedDataTypes()), + DepthToSpace); + +ONNX_OPERATOR_VERSIONED_KERNEL_EX( + DepthToSpace, + kMSInternalNHWCDomain, + 11, 12, + kJsExecutionProvider, + KernelDefBuilder() + .TypeConstraint("T", JsepSupportedDataTypes()), + DepthToSpace); + +ONNX_OPERATOR_VERSIONED_KERNEL_EX( + DepthToSpace, + kOnnxDomain, + 11, 12, + kJsExecutionProvider, + KernelDefBuilder() + .TypeConstraint("T", JsepSupportedDataTypes()), + DepthToSpace); + +} // namespace js +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/js/operators/depth_to_space.h b/onnxruntime/core/providers/js/operators/depth_to_space.h new file mode 100644 index 0000000000..b43ce927b1 --- /dev/null +++ b/onnxruntime/core/providers/js/operators/depth_to_space.h @@ -0,0 +1,38 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/providers/js/js_kernel.h" + +#include +#include + +namespace onnxruntime { +namespace js { + +template +class DepthToSpace final : public JsKernel { + public: + DepthToSpace(const OpKernelInfo& info) : JsKernel(info) { + int64_t blocksize; + std::string mode; + ORT_ENFORCE(info.GetAttr("blocksize", &blocksize).IsOK(), "Attribute blocksize is not set."); + mode = info.GetAttrOrDefault("mode", "DCR"); + + if (mode != "DCR" && mode != "CRD") { + ORT_THROW("Invalid mode attribute value: ", mode); + } + + JSEP_INIT_KERNEL_ATTRIBUTE(DepthToSpace, ({ + "blocksize" : $1, + "mode" : UTF8ToString($2), + "format" : $3 ? "NHWC" : "NCHW" + }), + static_cast(blocksize), + mode.c_str(), static_cast(is_channels_last)); + } +}; + +} // namespace js +} // namespace onnxruntime