mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-12 00:59:23 +00:00
[js/webgpu] allows a ProgramInfo's RunData to use zero sized output (#19614)
### Description This PR allows zero-sized output. To make the implementation simple, it does not support partial zero-sized tensor. Which means, either all outputs are zero-sized, or an error will be reported. added 2 tests: - op test of `Add` with input T[2,0] T[2,1], and - test_split_zero_size_splits
This commit is contained in:
parent
efbe2b8455
commit
aec2389ad0
6 changed files with 72 additions and 10 deletions
|
|
@ -385,11 +385,16 @@ export class WebGpuBackend {
|
|||
// create info for inputs
|
||||
const inputDatas: GpuData[] = [];
|
||||
for (let i = 0; i < inputTensorViews.length; ++i) {
|
||||
const gpuData = this.gpuDataManager.get(inputTensorViews[i].data);
|
||||
if (!gpuData) {
|
||||
throw new Error(`no GPU data for input: ${inputTensorViews[i].data}`);
|
||||
const data = inputTensorViews[i].data;
|
||||
// if tensor view data is 0, it means the output is zero-sized tensor, and there is no GPU data for it.
|
||||
if (data === 0) {
|
||||
continue;
|
||||
}
|
||||
inputDatas[i] = gpuData;
|
||||
const gpuData = this.gpuDataManager.get(data);
|
||||
if (!gpuData) {
|
||||
throw new Error(`no GPU data for input: ${data}`);
|
||||
}
|
||||
inputDatas.push(gpuData);
|
||||
}
|
||||
|
||||
const {outputs, dispatchGroup, programUniforms} = program.getRunData(inputTensorViews);
|
||||
|
|
@ -419,6 +424,11 @@ export class WebGpuBackend {
|
|||
const tensorView = (isTemporary || isPersistent) ?
|
||||
createIntermediateOutput(outputs[i].dataType, outputs[i].dims) :
|
||||
createKernelOutput(validatedOutputIndices[i], outputs[i].dataType, outputs[i].dims);
|
||||
outputTensorViews.push(tensorView);
|
||||
// if tensor view data is 0, it means the output is zero-sized tensor, and there is no GPU data for it.
|
||||
if (tensorView.data === 0) {
|
||||
continue;
|
||||
}
|
||||
const gpuData = this.gpuDataManager.get(tensorView.data);
|
||||
if (!gpuData) {
|
||||
throw new Error(`no GPU data for output: ${tensorView.data}`);
|
||||
|
|
@ -434,10 +444,24 @@ export class WebGpuBackend {
|
|||
}
|
||||
persistentData.push(gpuData);
|
||||
}
|
||||
outputTensorViews.push(tensorView);
|
||||
outputDatas.push(gpuData);
|
||||
}
|
||||
|
||||
// when there are any zero-sized tensor in the inputs or outputs, we should report error unless all outputs are
|
||||
// zero-sized tensors.
|
||||
if (inputDatas.length !== inputTensorViews.length || outputDatas.length !== outputTensorViews.length) {
|
||||
// if all outputs are zero-sized tensors, there is no need to run the program.
|
||||
if (outputDatas.length === 0) {
|
||||
TRACE_FUNC_END(program.name);
|
||||
return outputTensorViews;
|
||||
}
|
||||
// if some outputs are zero-sized tensors, report an error.
|
||||
//
|
||||
// TODO: so far we don't see any use case that outputs include both zero-sized tensors and non-zero-sized tensors.
|
||||
// If we see such use case, we need to make a change here to support it.
|
||||
throw new Error(
|
||||
`Program ${program.name} has zero-sized tensor(s) in inputs or outputs. This is not supported now.`);
|
||||
}
|
||||
|
||||
// load uniforms
|
||||
// TODO: add cache for uniform (is it necessary?)
|
||||
|
|
|
|||
|
|
@ -104,7 +104,8 @@ class ComputeContextImpl implements ComputeContext {
|
|||
throw new Error(`Unsupported data type: ${dataType}`);
|
||||
}
|
||||
const bufferSize = elementSize * ShapeUtil.size(dims);
|
||||
return new TensorViewImpl(this.module, dataType, this.backend.gpuDataManager.create(bufferSize).id, dims);
|
||||
const gpuDataId = bufferSize > 0 ? this.backend.gpuDataManager.create(bufferSize).id : 0;
|
||||
return new TensorViewImpl(this.module, dataType, gpuDataId, dims);
|
||||
};
|
||||
return this.backend.run(program, mappedInputs, outputIndices, createKernelOutput, createTemporaryOutput);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,7 +56,16 @@ export class BroadcastUtil {
|
|||
if (aLen !== bLen && aLen > 1 && bLen > 1) {
|
||||
return undefined;
|
||||
}
|
||||
cdims[crank - i] = Math.max(aLen, bLen);
|
||||
const max = Math.max(aLen, bLen);
|
||||
if (aLen && bLen) {
|
||||
cdims[crank - i] = Math.max(aLen, bLen);
|
||||
} else {
|
||||
// when either aLen or bLen is 0, the other should be either 0 or 1, otherwise it is not broadcastable.
|
||||
if (max > 1) {
|
||||
return undefined;
|
||||
}
|
||||
cdims[crank - i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return cdims;
|
||||
|
|
|
|||
|
|
@ -157,6 +157,28 @@
|
|||
"type": "float32"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "T[2,0] T[2,1]",
|
||||
"inputs": [
|
||||
{
|
||||
"data": [],
|
||||
"dims": [2, 0],
|
||||
"type": "float32"
|
||||
},
|
||||
{
|
||||
"data": [1, 2],
|
||||
"dims": [2, 1],
|
||||
"type": "float32"
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"data": [],
|
||||
"dims": [2, 0],
|
||||
"type": "float32"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1231,7 +1231,7 @@
|
|||
"test_split_variable_parts_1d",
|
||||
"test_split_variable_parts_2d",
|
||||
"test_split_variable_parts_default_axis",
|
||||
// // "test_split_zero_size_splits",
|
||||
"test_split_zero_size_splits",
|
||||
"test_sqrt_example",
|
||||
"test_sqrt",
|
||||
"test_squeeze_negative_axes",
|
||||
|
|
|
|||
|
|
@ -573,7 +573,9 @@ export async function sessionRun(options: {
|
|||
// replace the CPU tensors in feeds into GPU tensors
|
||||
for (const name in feeds) {
|
||||
if (Object.hasOwnProperty.call(feeds, name)) {
|
||||
feeds[name] = createGpuTensorForInput(feeds[name]);
|
||||
if (feeds[name].size > 0) {
|
||||
feeds[name] = createGpuTensorForInput(feeds[name]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -582,7 +584,11 @@ export async function sessionRun(options: {
|
|||
for (const name in options.outputsMetaInfo) {
|
||||
if (Object.hasOwnProperty.call(options.outputsMetaInfo, name)) {
|
||||
const {type, dims} = options.outputsMetaInfo[name];
|
||||
fetches[name] = createGpuTensorForOutput(type, dims);
|
||||
if (dims.some(d => d === 0)) {
|
||||
fetches[name] = new ort.Tensor(type, [], dims);
|
||||
} else {
|
||||
fetches[name] = createGpuTensorForOutput(type, dims);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue