[js/webgpu] allows a ProgramInfo's RunData to use zero sized output (#19614)

### Description This PR allows zero-sized output. To make the implementation simple, it does not support partial zero-sized tensor. Which means, either all outputs are zero-sized, or an error will be reported. added 2 tests: - op test of `Add` with input T[2,0] T[2,1], and - test_split_zero_size_splits
2026-07-28 20:11:22 +00:00 · 2024-02-23 12:52:47 -08:00 · 2024-02-23 12:52:47 -08:00 · aec2389ad0
commit aec2389ad0
parent efbe2b8455
6 changed files with 72 additions and 10 deletions
--- a/js/web/lib/wasm/jsep/backend-webgpu.ts
+++ b/js/web/lib/wasm/jsep/backend-webgpu.ts
@ -385,11 +385,16 @@ export class WebGpuBackend {
    // create info for inputs
    const inputDatas: GpuData[] = [];
    for (let i = 0; i < inputTensorViews.length; ++i) {
-      const gpuData = this.gpuDataManager.get(inputTensorViews[i].data);
-      if (!gpuData) {
-        throw new Error(`no GPU data for input: ${inputTensorViews[i].data}`);
+      const data = inputTensorViews[i].data;
+      // if tensor view data is 0, it means the output is zero-sized tensor, and there is no GPU data for it.
+      if (data === 0) {
+        continue;
      }
-      inputDatas[i] = gpuData;
+      const gpuData = this.gpuDataManager.get(data);
+      if (!gpuData) {
+        throw new Error(`no GPU data for input: ${data}`);
+      }
+      inputDatas.push(gpuData);
    }

    const {outputs, dispatchGroup, programUniforms} = program.getRunData(inputTensorViews);
@ -419,6 +424,11 @@ export class WebGpuBackend {
      const tensorView = (isTemporary || isPersistent) ?
          createIntermediateOutput(outputs[i].dataType, outputs[i].dims) :
          createKernelOutput(validatedOutputIndices[i], outputs[i].dataType, outputs[i].dims);
+      outputTensorViews.push(tensorView);
+      // if tensor view data is 0, it means the output is zero-sized tensor, and there is no GPU data for it.
+      if (tensorView.data === 0) {
+        continue;
+      }
      const gpuData = this.gpuDataManager.get(tensorView.data);
      if (!gpuData) {
        throw new Error(`no GPU data for output: ${tensorView.data}`);
@ -434,10 +444,24 @@ export class WebGpuBackend {
        }
        persistentData.push(gpuData);
      }
-      outputTensorViews.push(tensorView);
      outputDatas.push(gpuData);
    }

+    // when there are any zero-sized tensor in the inputs or outputs, we should report error unless all outputs are
+    // zero-sized tensors.
+    if (inputDatas.length !== inputTensorViews.length || outputDatas.length !== outputTensorViews.length) {
+      // if all outputs are zero-sized tensors, there is no need to run the program.
+      if (outputDatas.length === 0) {
+        TRACE_FUNC_END(program.name);
+        return outputTensorViews;
+      }
+      // if some outputs are zero-sized tensors, report an error.
+      //
+      // TODO: so far we don't see any use case that outputs include both zero-sized tensors and non-zero-sized tensors.
+      // If we see such use case, we need to make a change here to support it.
+      throw new Error(
+          `Program ${program.name} has zero-sized tensor(s) in inputs or outputs. This is not supported now.`);
+    }

    // load uniforms
    // TODO: add cache for uniform (is it necessary?)
--- a/js/web/lib/wasm/jsep/init.ts
+++ b/js/web/lib/wasm/jsep/init.ts
@ -104,7 +104,8 @@ class ComputeContextImpl implements ComputeContext {
        throw new Error(`Unsupported data type: ${dataType}`);
      }
      const bufferSize = elementSize * ShapeUtil.size(dims);
-      return new TensorViewImpl(this.module, dataType, this.backend.gpuDataManager.create(bufferSize).id, dims);
+      const gpuDataId = bufferSize > 0 ? this.backend.gpuDataManager.create(bufferSize).id : 0;
+      return new TensorViewImpl(this.module, dataType, gpuDataId, dims);
    };
    return this.backend.run(program, mappedInputs, outputIndices, createKernelOutput, createTemporaryOutput);
  }
--- a/js/web/lib/wasm/jsep/util.ts
+++ b/js/web/lib/wasm/jsep/util.ts
@ -56,7 +56,16 @@ export class BroadcastUtil {
      if (aLen !== bLen && aLen > 1 && bLen > 1) {
        return undefined;
      }
-      cdims[crank - i] = Math.max(aLen, bLen);
+      const max = Math.max(aLen, bLen);
+      if (aLen && bLen) {
+        cdims[crank - i] = Math.max(aLen, bLen);
+      } else {
+        // when either aLen or bLen is 0, the other should be either 0 or 1, otherwise it is not broadcastable.
+        if (max > 1) {
+          return undefined;
+        }
+        cdims[crank - i] = 0;
+      }
    }

    return cdims;
--- a/js/web/test/data/ops/add.jsonc
+++ b/js/web/test/data/ops/add.jsonc
@ -157,6 +157,28 @@
            "type": "float32"
          }
        ]
+      },
+      {
+        "name": "T[2,0] T[2,1]",
+        "inputs": [
+          {
+            "data": [],
+            "dims": [2, 0],
+            "type": "float32"
+          },
+          {
+            "data": [1, 2],
+            "dims": [2, 1],
+            "type": "float32"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [],
+            "dims": [2, 0],
+            "type": "float32"
+          }
+        ]
      }
    ]
  }
--- a/js/web/test/suite-test-list.jsonc
+++ b/js/web/test/suite-test-list.jsonc
@ -1231,7 +1231,7 @@
      "test_split_variable_parts_1d",
      "test_split_variable_parts_2d",
      "test_split_variable_parts_default_axis",
-      // // "test_split_zero_size_splits",
+      "test_split_zero_size_splits",
      "test_sqrt_example",
      "test_sqrt",
      "test_squeeze_negative_axes",
--- a/js/web/test/test-runner.ts
+++ b/js/web/test/test-runner.ts
@ -573,7 +573,9 @@ export async function sessionRun(options: {
      // replace the CPU tensors in feeds into GPU tensors
      for (const name in feeds) {
        if (Object.hasOwnProperty.call(feeds, name)) {
-          feeds[name] = createGpuTensorForInput(feeds[name]);
+          if (feeds[name].size > 0) {
+            feeds[name] = createGpuTensorForInput(feeds[name]);
+          }
        }
      }
    }
@ -582,7 +584,11 @@ export async function sessionRun(options: {
      for (const name in options.outputsMetaInfo) {
        if (Object.hasOwnProperty.call(options.outputsMetaInfo, name)) {
          const {type, dims} = options.outputsMetaInfo[name];
-          fetches[name] = createGpuTensorForOutput(type, dims);
+          if (dims.some(d => d === 0)) {
+            fetches[name] = new ort.Tensor(type, [], dims);
+          } else {
+            fetches[name] = createGpuTensorForOutput(type, dims);
+          }
        }
      }
    }