onnxruntime/js/web/lib/wasm/wasm-common.ts
Yulong Wang 561aca97cf
[js/webgpu] support IO binding (#17480)
<del>
**This PR is based on a few prerequisites PRs. They are listed as
below:**
- #17465
- #17469
- #17470
- #17472
- #17473
- #17484

Please review the current change by only looking at commit
e2e6623e673ec6de55a5c1f8edcbd3a46b535a89 and later.


</del>

### Description

This PR introduces WebGPU IO binding. This new feature allows
onnxruntime-web users to use tensors created from GPU as model
input/output so that a model inferencing can be done without unnecessary
data copy between CPU and GPU for model input/output.

### Examples

An E2E demo/example is being worked on.

Following is some simple demo with code snippet.

Let's first check today how we do:
```js
// STEP.1 - create an inference session:
const mySession = await ort.InferenceSession.create('./my_model.onnx', { executionProviders: ['webgpu'] });

// STEP.2 - create model input: (supposing myImageCpuData is a Float32Array)
const feeds = {
  'input_image:0': new ort.Tensor('float32', myImageCpuData, [1, 224, 224, 3])
};

// STEP.3 - run model
const myResults = await mySession.run(feeds);

// STEP.4 - get output data
const myData = myResults['output_image:0'].data; // Float32Array

```

#### for inputs (GPU tensor):

Now, with IO binding, you can create a tensor from a GPU buffer, and
feed it to the model:
```js
// new STEP.2.A - create model input from a GPU buffer: (supposing myInputGpuBuffer is a `GPUBuffer` object with input data)
const feeds = {
  'input_image:0': ort.Tensor.fromGpuBuffer(myInputGpuBuffer, { dataType: 'float32', dims: [1, 224, 224, 3] })
};
```

### for outputs (pre-allocated GPU tensor)

you can also do that for output, **if you know the output shape**:
```js
// new STEP.2.B - create model output from a GPU buffer: (supposing myOutputGpuBuffer is a pre-allocated `GPUBuffer` object)
const fetches = {
  'output_image:0': ort.Tensor.fromGpuBuffer(myOutputGpuBuffer, { dataType: 'float32', dims: [1, 512, 512, 3] })
};

// new STEP.3 - run model with pre-allocated output (fetches)
const myResults = await mySession.run(feeds, fetches);
```

### for outputs (specify location)

if you do not know the output shape, you can specify the output location
when creating the session:

```js
// new STEP.1 - create an inference session with an option "preferredOutputLocation":
const mySession = await ort.InferenceSession.create('./my_model.onnx', {
    executionProviders: ['webgpu'],
    preferredOutputLocation: "gpu-buffer"
});
```

if the model has multiple outputs, you can specify them seperately:
```js
// new STEP.1 - create an inference session with an option "preferredOutputLocation":
const mySession = await ort.InferenceSession.create('./my_model.onnx', {
    executionProviders: ['webgpu'],
    preferredOutputLocation: {
         "output_image:0": "gpu-buffer"
    }
});
```

now you don't need to prepare the `fetches` object and onnxruntime-web
will prepare output data on the location that specified.

#### read data

when you get the output tensor, you can:
```js
// get the gpu buffer object:
const gpuBuffer = myOutputTensor.gpuBuffer; // GPUBuffer

// get the CPU data asynchronizely
const cpuData = await myOutputTensor.getData();

// get the CPU data asynchronizely and release the underlying GPU resources
const cpuData = await myOutputTensor.getData(true);

// dispose the tensor (release the underlying GPU resources). This tensor object will be invalid after dispose() is called.
myOutputTensor.dispose();
```

#### resource management

JavaScript has GC so you don't need to worry about managing JavaScript
objects. But there are 2 types of resources that are not managed by GC:
- GPU buffer that used in tensors
- Underlying ORT native resources

To simplify, most of the unmanaged resources and handled inside ORT web.
But there are a few resources that need users to manage:
- All external GPU resources, including GPU buffers inside all tensors
created by `Tensor.fromGpuBuffer()`, will not be managed by ORT. User
should manage those GPU buffers themselves.
- When a session is created with `preferredOutputLocation` ==
"gpu-buffer" specified in session options, and the corresponding output
is not pre-allocated, user need to call the output tensor's `dispose()`
or `getData(true)` to manually release the underlying GPU buffers.
- ORT internal errors (including providing a pre-allocated output tensor
with wrong type/dims) will invalidate the whole wasm memory and is not
recoverable. An exception is thrown in this situation.
2023-09-29 11:24:42 -07:00

198 lines
5.1 KiB
TypeScript

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
import {Tensor} from 'onnxruntime-common';
// This file includes common definitions. They do NOT have dependency on the WebAssembly instance.
/**
* Copied from ONNX definition. Use this to drop dependency 'onnx_proto' to decrease compiled .js file size.
*/
export const enum DataType {
undefined = 0,
float = 1,
uint8 = 2,
int8 = 3,
uint16 = 4,
int16 = 5,
int32 = 6,
int64 = 7,
string = 8,
bool = 9,
float16 = 10,
double = 11,
uint32 = 12,
uint64 = 13,
complex64 = 14,
complex128 = 15,
bfloat16 = 16
}
/**
* Map string tensor data to enum value
*/
export const tensorDataTypeStringToEnum = (type: string): DataType => {
switch (type) {
case 'int8':
return DataType.int8;
case 'uint8':
return DataType.uint8;
case 'bool':
return DataType.bool;
case 'int16':
return DataType.int16;
case 'uint16':
return DataType.uint16;
case 'int32':
return DataType.int32;
case 'uint32':
return DataType.uint32;
case 'float16':
return DataType.float16;
case 'float32':
return DataType.float;
case 'float64':
return DataType.double;
case 'string':
return DataType.string;
case 'int64':
return DataType.int64;
case 'uint64':
return DataType.uint64;
default:
throw new Error(`unsupported data type: ${type}`);
}
};
/**
* Map enum value to string tensor data
*/
export const tensorDataTypeEnumToString = (typeProto: DataType): Tensor.Type => {
switch (typeProto) {
case DataType.int8:
return 'int8';
case DataType.uint8:
return 'uint8';
case DataType.bool:
return 'bool';
case DataType.int16:
return 'int16';
case DataType.uint16:
return 'uint16';
case DataType.int32:
return 'int32';
case DataType.uint32:
return 'uint32';
case DataType.float16:
return 'float16';
case DataType.float:
return 'float32';
case DataType.double:
return 'float64';
case DataType.string:
return 'string';
case DataType.int64:
return 'int64';
case DataType.uint64:
return 'uint64';
default:
throw new Error(`unsupported data type: ${typeProto}`);
}
};
/**
* get tensor element size in bytes by the given data type
* @returns size in integer or undefined if the data type is not supported
*/
export const getTensorElementSize = (dateType: number): number|
undefined => [undefined, 4, 1, 1, 2, 2, 4, 8, undefined, 1, 2, 8, 4, 8, undefined, undefined, undefined][dateType];
/**
* get typed array constructor by the given tensor type
*/
export const tensorTypeToTypedArrayConstructor = (type: Tensor.Type): Float32ArrayConstructor|Uint8ArrayConstructor|
Int8ArrayConstructor|Uint16ArrayConstructor|Int16ArrayConstructor|Int32ArrayConstructor|BigInt64ArrayConstructor|
Uint8ArrayConstructor|Float64ArrayConstructor|Uint32ArrayConstructor|BigUint64ArrayConstructor => {
switch (type) {
case 'float16':
return Uint16Array;
case 'float32':
return Float32Array;
case 'uint8':
return Uint8Array;
case 'int8':
return Int8Array;
case 'uint16':
return Uint16Array;
case 'int16':
return Int16Array;
case 'int32':
return Int32Array;
case 'bool':
return Uint8Array;
case 'float64':
return Float64Array;
case 'uint32':
return Uint32Array;
case 'int64':
return BigInt64Array;
case 'uint64':
return BigUint64Array;
default:
throw new Error(`unsupported type: ${type}`);
}
};
/**
* Map string log level to integer value
*/
export const logLevelStringToEnum = (logLevel?: 'verbose'|'info'|'warning'|'error'|'fatal'): number => {
switch (logLevel) {
case 'verbose':
return 0;
case 'info':
return 1;
case 'warning':
return 2;
case 'error':
return 3;
case 'fatal':
return 4;
default:
throw new Error(`unsupported logging level: ${logLevel}`);
}
};
/**
* Check whether the given tensor type is supported by GPU buffer
*/
export const isGpuBufferSupportedType = (type: Tensor.Type): type is Tensor.GpuBufferDataTypes => type === 'float32' ||
type === 'int32' || type === 'int64' || type === 'bool' || type === 'float16' || type === 'uint32';
/**
* Map string data location to integer value
*/
export const dataLocationStringToEnum = (location: Tensor.DataLocation): number => {
switch (location) {
case 'none':
return 0;
case 'cpu':
return 1;
case 'cpu-pinned':
return 2;
case 'texture':
return 3;
case 'gpu-buffer':
return 4;
default:
throw new Error(`unsupported data location: ${location}`);
}
};
/**
* Map integer data location to string value
*/
export const dataLocationEnumToString = (location: number): Tensor.DataLocation|undefined =>
(['none', 'cpu', 'cpu-pinned', 'texture', 'gpu-buffer'] as const)[location];