diff --git a/js/web/docs/webgpu-operators.md b/js/web/docs/webgpu-operators.md index de53f943bc..71d98f5d73 100644 --- a/js/web/docs/webgpu-operators.md +++ b/js/web/docs/webgpu-operators.md @@ -59,6 +59,7 @@ Do not modify directly.* | Mul | ai.onnx(7-12,13,14+) | | | Neg | ai.onnx(6-12,13+) | | | Not | ai.onnx(1+) | | +| Pad | ai.onnx(2-10,11-12,13-17,18,19+) | | | Pow | ai.onnx(7-11,12,13-14,15+) | | | Reciprocal | ai.onnx(6-12,13+) | | | ReduceL1 | ai.onnx(1-10,11-12,13-17,18+) | | diff --git a/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts b/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts index 9c46b97694..e92e6696d9 100644 --- a/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts +++ b/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts @@ -14,6 +14,7 @@ import {gemm, parseGemmAttributes} from './ops/gemm'; import {instanceNorm, parseInstanceNormAttributes} from './ops/instance-norm'; import {layerNorm, parseLayerNormAttributes} from './ops/layer-norm'; import {matMul} from './ops/matmul'; +import {pad, parsePadAttributes} from './ops/pad'; import * as pool from './ops/pool'; import {parseReduceAttributes, reduceL1, reduceL2, reduceLogSum, reduceLogSumExp, reduceMax, reduceMean, reduceMin, reduceProd, reduceSum, reduceSumSquare} from './ops/reduce'; import {parseResizeAttributes, resize} from './ops/resize'; @@ -80,6 +81,7 @@ export const WEBGPU_OP_RESOLVE_RULES: Map = new ['Mul', [binaryOps.mul]], ['Neg', [unaryOps.neg]], ['Not', [unaryOps.not]], + ['Pad', [pad, parsePadAttributes]], ['Pow', [binaryOps.pow]], ['Reciprocal', [unaryOps.reciprocal]], ['ReduceMin', [reduceMin, parseReduceAttributes]], diff --git a/js/web/lib/wasm/jsep/webgpu/ops/pad.ts b/js/web/lib/wasm/jsep/webgpu/ops/pad.ts new file mode 100644 index 0000000000..d90296b5c5 --- /dev/null +++ b/js/web/lib/wasm/jsep/webgpu/ops/pad.ts @@ -0,0 +1,252 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +import {DataType} from '../../../wasm-common'; +import {TensorView} from '../../tensor'; +import {ShapeUtil} from '../../util'; +import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key'; +import {ComputeContext, GpuDataType, ProgramInfo, ProgramInfoLoader, ProgramMetadata} from '../types'; + +import {IndicesHelper, inputVariable, outputVariable, ShaderHelper} from './common'; + +export interface PadAttributes extends AttributeWithCacheKey { + // 0-constant, 1-reflect, 2-edge, 3-wrap + readonly mode: number; + readonly value: number; + readonly pads: number[]; +} + +const validateInputs = (inputs: readonly TensorView[]): void => { + if (!inputs || inputs.length < 1) { + throw new Error('Too few inputs'); + } + if (inputs[0].dataType !== DataType.float) { + throw new Error('Input type must be float.'); + } + + if (inputs.length >= 2) { + let validPads = inputs[0].dims.length * 2 === inputs[1].dims[0]; + if (inputs.length === 4) { + validPads = inputs[3].dims[0] * 2 === inputs[1].dims[0]; + } + if (!validPads) { + throw new Error('The pads should be a 1D tensor of shape [2 * input_rank] or [2 * num_axes].'); + } + } +}; + +const getPadConstant = + (output: IndicesHelper, outputDims: readonly number[], inputDims: readonly number[], + inputStrides: readonly number[], pads: number[], dataType: string, constantValue: number): string => { + const inputRank = inputDims.length; + + let block = ''; + for (let i = inputRank - 1; i >= 0; --i) { + block += ` + k = i32(${output.indicesGet('indices', i)}) - ${pads[i]}; + if (k < 0) { + break; + } + if (k >= ${inputDims[i]}) { + break; + } + offset += k * ${inputStrides[i]}; + `; + } + + return ` + value = ${dataType}(${constantValue}); + for (var i = 0; i < 1; i++) { + var offset = 0; + var k = 0; + ${block} + value = x[offset]; + } + `; + }; + +const getPadReflect = + (output: IndicesHelper, outputDims: readonly number[], inputDims: readonly number[], + inputStrides: readonly number[], pads: number[]): string => { + const inputRank = inputDims.length; + + let block = ''; + for (let i = inputRank - 1; i >= 0; --i) { + block += ` + k = i32(${output.indicesGet('indices', i)}) - ${pads[i]}; + if (k < 0) { + k = -k; + } + { + let _2n_1 = ${2 * (inputDims[i] - 1)}; + k = k % _2n_1; + if(k >= ${inputDims[i]}) { + k = _2n_1 - k; + } + } + offset += k * ${inputStrides[i]}; + `; + } + + return ` + var offset = 0; + var k = 0; + ${block} + value = x[offset]; + `; + }; + +const getPadEdge = + (output: IndicesHelper, outputDims: readonly number[], inputDims: readonly number[], + inputStrides: readonly number[], pads: number[]): string => { + const inputRank = inputDims.length; + + let block = ''; + for (let i = inputRank - 1; i >= 0; --i) { + block += ` + k = i32(${output.indicesGet('indices', i)}) - ${pads[i]}; + if (k < 0) { + k = 0; + } + if (k >= ${inputDims[i]}) { + k = ${inputDims[i] - 1}; + } + offset += k * ${inputStrides[i]}; + `; + } + + return ` + var offset = 0; + var k = 0; + ${block} + value = x[offset]; + `; + }; + +const getPadWrap = + (output: IndicesHelper, outputDims: readonly number[], inputDims: readonly number[], + inputStrides: readonly number[], pads: number[]): string => { + const inputRank = inputDims.length; + + let block = ''; + for (let i = inputRank - 1; i >= 0; --i) { + block += ` + k = i32(${output.indicesGet('indices', i)}) - ${pads[i]}; + if (k < 0) { + k += ${inputDims[i]}; + } + if (k >= ${inputDims[i]}) { + k -= ${inputDims[i]}; + } + offset += k * ${inputStrides[i]}; + `; + } + + return ` + var offset = 0; + var k = 0; + ${block} + value = x[offset]; + `; + }; + +const getPadSnippet = + (output: IndicesHelper, outputDims: readonly number[], inputDims: readonly number[], + inputStrides: readonly number[], attributes: PadAttributes, dataType: string): string => { + switch (attributes.mode) { + case 0: + return getPadConstant( + output, outputDims, inputDims, inputStrides, attributes.pads, dataType, attributes.value); + case 1: + return getPadReflect(output, outputDims, inputDims, inputStrides, attributes.pads); + case 2: + return getPadEdge(output, outputDims, inputDims, inputStrides, attributes.pads); + case 3: + return getPadWrap(output, outputDims, inputDims, inputStrides, attributes.pads); + default: + throw new Error('Invalid mode'); + } + }; + +const generatePadCode = + (shaderHelper: ShaderHelper, inputs: readonly TensorView[], attributes: PadAttributes, dataType: string): + string => { + const inputDims = inputs[0].dims; + const outputDims = ShapeUtil.padShape(inputDims.slice(), attributes.pads); + const outputSize = ShapeUtil.size(outputDims); + const inputStrides = ShapeUtil.computeStrides(inputDims); + + const output = outputVariable('output', inputs[0].dataType, outputDims); + const input = inputVariable('x', inputs[0].dataType, inputDims); + + const padSnippet = getPadSnippet(output, outputDims, inputDims, inputStrides, attributes, dataType); + const padCode = ` + ${shaderHelper.declareVariables(input, output)} + ${output.impl()} + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(outputSize)} + + let indices = ${output.offsetToIndices('global_idx')}; + + var value = ${dataType}(0); + ${padSnippet} + output[global_idx] = value; + }`; + return padCode; + }; + +const createPadProgramInfo = + (inputs: readonly TensorView[], metadata: ProgramMetadata, attributes: PadAttributes): ProgramInfo => { + const outputShape = ShapeUtil.padShape(inputs[0].dims.slice(), attributes.pads); + return { + ...metadata, + outputs: [{dims: outputShape, dataType: inputs[0].dataType, gpuDataType: GpuDataType.default}], + getShaderSource: shaderHelper => generatePadCode(shaderHelper, inputs, attributes, 'f32'), + dispatchGroup: () => ({x: Math.ceil(ShapeUtil.size(outputShape) / 64 /* workgroup size */)}) + }; + }; + +const createPadAttributesFromInputs = (inputs: readonly TensorView[], attributes: PadAttributes): PadAttributes => { + if (inputs.length > 1) { + const bigInt64Pads = inputs[1].getBigInt64Array(); + const value = (inputs.length >= 3) ? inputs[2].getFloat32Array()[0] : 0.0; + + const inputRank = inputs[0].dims.length; + const updatePads = new Int32Array(2 * inputRank).fill(0); + if (inputs.length >= 4) { + const axes = inputs[3].getBigInt64Array(); + for (let i = 0; i < axes.length; i++) { + updatePads[Number(axes[i])] = Number(bigInt64Pads[i]); + updatePads[Number(axes[i]) + inputRank] = Number(bigInt64Pads[i + axes.length]); + } + } else { + bigInt64Pads.forEach((i, v) => updatePads[Number(i)] = (Number(v))); + } + + const pads: number[] = []; + updatePads.forEach(v => pads.push(v)); + + return createAttributeWithCacheKey({mode: attributes.mode, value, pads}); + } else { + return attributes; + } +}; + +const createPadProgramInfoLoader = (inputs: readonly TensorView[], attributes: PadAttributes): ProgramInfoLoader => { + const updatedAttributes = createPadAttributesFromInputs(inputs, attributes); + const metadata: + ProgramMetadata = {name: 'Pad', inputTypes: [GpuDataType.default], cacheHint: updatedAttributes.cacheKey}; + return {...metadata, get: () => createPadProgramInfo(inputs, metadata, updatedAttributes)}; +}; + +export const pad = (context: ComputeContext, attributes: PadAttributes): void => { + validateInputs(context.inputs); + context.compute(createPadProgramInfoLoader(context.inputs, attributes), {inputs: [0]}); +}; + +export const parsePadAttributes = (attributes: Record): PadAttributes => { + const mode = attributes.mode as number; + const value = attributes.value as number; + const pads = attributes.pads as number[]; + return createAttributeWithCacheKey({mode, value, pads}); +}; diff --git a/js/web/test/suite-test-list.jsonc b/js/web/test/suite-test-list.jsonc index f4249b2410..e580259071 100644 --- a/js/web/test/suite-test-list.jsonc +++ b/js/web/test/suite-test-list.jsonc @@ -505,7 +505,7 @@ // // "test_dynamicquantizelinear_min_adjusted_expanded", // // "test_dynamicquantizelinear_min_adjusted", // // "test_dynamicquantizelinear", - // // "test_edge_pad", + "test_edge_pad", // "test_einsum_batch_diagonal", // "test_einsum_batch_matmul", // "test_einsum_inner_prod", @@ -965,7 +965,7 @@ "test_reduce_sum_square_keepdims_random", "test_reduce_sum_square_negative_axes_keepdims_example", "test_reduce_sum_square_negative_axes_keepdims_random", - // // "test_reflect_pad", + "test_reflect_pad", "test_relu", // "test_reshape_allowzero_reordered", "test_reshape_extended_dims", @@ -1308,7 +1308,8 @@ "test_unsqueeze_three_axes", "test_unsqueeze_two_axes", "test_unsqueeze_unsorted_axes", - "test_unsqueeze" + "test_unsqueeze", + "test_wrap_pad" // "test_upsample_nearest", // "test_where_example", // "test_where_long_example", @@ -1361,8 +1362,8 @@ "reduce-min.jsonc", "relu.jsonc", "gelu.jsonc", - //"pad.jsonc", - //"pad-big.jsonc", + "pad.jsonc", + "pad-big.jsonc", "pow.jsonc", "pow_int32.jsonc", "pow-big-number.jsonc", diff --git a/onnxruntime/core/providers/js/js_execution_provider.cc b/onnxruntime/core/providers/js/js_execution_provider.cc index 6b548921cd..9dccd7c47f 100644 --- a/onnxruntime/core/providers/js/js_execution_provider.cc +++ b/onnxruntime/core/providers/js/js_execution_provider.cc @@ -321,6 +321,12 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 6 class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 12, float, Einsum); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 2, 10, Pad); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 12, Pad); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, 17, Pad); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 18, 18, Pad); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 19, Pad); + std::unique_ptr RegisterKernels() { auto kernel_registry = std::make_unique(); @@ -577,6 +583,12 @@ std::unique_ptr RegisterKernels() { BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + }; for (auto& function_table_entry : function_table) { diff --git a/onnxruntime/core/providers/js/operators/pad.cc b/onnxruntime/core/providers/js/operators/pad.cc new file mode 100644 index 0000000000..24ba85cbf6 --- /dev/null +++ b/onnxruntime/core/providers/js/operators/pad.cc @@ -0,0 +1,72 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/providers/js/js_kernel.h" + +#include "pad.h" + +namespace onnxruntime { +namespace js { + +ONNX_OPERATOR_VERSIONED_KERNEL_EX( + Pad, + kOnnxDomain, + 2, + 10, + kJsExecutionProvider, + (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType()), + Pad); + +ONNX_OPERATOR_VERSIONED_KERNEL_EX( + Pad, + kOnnxDomain, + 11, + 12, + kJsExecutionProvider, + (*KernelDefBuilder::Create()) + .TypeConstraint("T", DataTypeImpl::GetTensorType()) + .InputMemoryType(OrtMemTypeCPU, 1) + .InputMemoryType(OrtMemTypeCPU, 2) + .InputMemoryType(OrtMemTypeCPU, 3), + Pad); + +ONNX_OPERATOR_VERSIONED_KERNEL_EX( + Pad, + kOnnxDomain, + 13, + 17, + kJsExecutionProvider, + (*KernelDefBuilder::Create()) + .TypeConstraint("T", DataTypeImpl::GetTensorType()) + .InputMemoryType(OrtMemTypeCPU, 1) + .InputMemoryType(OrtMemTypeCPU, 2) + .InputMemoryType(OrtMemTypeCPU, 3), + Pad); + +ONNX_OPERATOR_VERSIONED_KERNEL_EX( + Pad, + kOnnxDomain, + 18, + 18, + kJsExecutionProvider, + (*KernelDefBuilder::Create()) + .TypeConstraint("T", DataTypeImpl::GetTensorType()) + .InputMemoryType(OrtMemTypeCPU, 1) + .InputMemoryType(OrtMemTypeCPU, 2) + .InputMemoryType(OrtMemTypeCPU, 3), + Pad); + +ONNX_OPERATOR_KERNEL_EX( + Pad, + kOnnxDomain, + 19, + kJsExecutionProvider, + (*KernelDefBuilder::Create()) + .TypeConstraint("T", DataTypeImpl::GetTensorType()) + .InputMemoryType(OrtMemTypeCPU, 1) + .InputMemoryType(OrtMemTypeCPU, 2) + .InputMemoryType(OrtMemTypeCPU, 3), + Pad); + +} // namespace js +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/js/operators/pad.h b/onnxruntime/core/providers/js/operators/pad.h new file mode 100644 index 0000000000..19168f40b4 --- /dev/null +++ b/onnxruntime/core/providers/js/operators/pad.h @@ -0,0 +1,34 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/providers/js/js_kernel.h" +#include "core/providers/cpu/tensor/padbase.h" + +namespace onnxruntime { +namespace js { + +class Pad : public JsKernel, public PadBase { + public: + explicit Pad(const OpKernelInfo& info) : JsKernel(info), PadBase(info) { + std::vector pads; + if (!is_dynamic_) { + pads.resize(pads_.size()); + for (size_t i = 0; i < pads_.size(); ++i) { + pads[i] = gsl::narrow_cast(pads_[i]); + } + } + + JSEP_INIT_KERNEL_ATTRIBUTE(Pad, ({"mode" : $1, + "value" : $2, + "pads" : $3 ? Array.from(HEAP32.subarray($4, $4 + $3)) : []}), + static_cast(mode_), + static_cast(value_), + gsl::narrow_cast(pads.size()), + reinterpret_cast((pads.size() > 0) ? pads.data() : nullptr) >> 2); + } +}; + +} // namespace js +} // namespace onnxruntime