mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-17 21:10:43 +00:00
<del> **This PR is based on a few prerequisites PRs. They are listed as below:** - #17465 - #17469 - #17470 - #17472 - #17473 - #17484 Please review the current change by only looking at commit e2e6623e673ec6de55a5c1f8edcbd3a46b535a89 and later. </del> ### Description This PR introduces WebGPU IO binding. This new feature allows onnxruntime-web users to use tensors created from GPU as model input/output so that a model inferencing can be done without unnecessary data copy between CPU and GPU for model input/output. ### Examples An E2E demo/example is being worked on. Following is some simple demo with code snippet. Let's first check today how we do: ```js // STEP.1 - create an inference session: const mySession = await ort.InferenceSession.create('./my_model.onnx', { executionProviders: ['webgpu'] }); // STEP.2 - create model input: (supposing myImageCpuData is a Float32Array) const feeds = { 'input_image:0': new ort.Tensor('float32', myImageCpuData, [1, 224, 224, 3]) }; // STEP.3 - run model const myResults = await mySession.run(feeds); // STEP.4 - get output data const myData = myResults['output_image:0'].data; // Float32Array ``` #### for inputs (GPU tensor): Now, with IO binding, you can create a tensor from a GPU buffer, and feed it to the model: ```js // new STEP.2.A - create model input from a GPU buffer: (supposing myInputGpuBuffer is a `GPUBuffer` object with input data) const feeds = { 'input_image:0': ort.Tensor.fromGpuBuffer(myInputGpuBuffer, { dataType: 'float32', dims: [1, 224, 224, 3] }) }; ``` ### for outputs (pre-allocated GPU tensor) you can also do that for output, **if you know the output shape**: ```js // new STEP.2.B - create model output from a GPU buffer: (supposing myOutputGpuBuffer is a pre-allocated `GPUBuffer` object) const fetches = { 'output_image:0': ort.Tensor.fromGpuBuffer(myOutputGpuBuffer, { dataType: 'float32', dims: [1, 512, 512, 3] }) }; // new STEP.3 - run model with pre-allocated output (fetches) const myResults = await mySession.run(feeds, fetches); ``` ### for outputs (specify location) if you do not know the output shape, you can specify the output location when creating the session: ```js // new STEP.1 - create an inference session with an option "preferredOutputLocation": const mySession = await ort.InferenceSession.create('./my_model.onnx', { executionProviders: ['webgpu'], preferredOutputLocation: "gpu-buffer" }); ``` if the model has multiple outputs, you can specify them seperately: ```js // new STEP.1 - create an inference session with an option "preferredOutputLocation": const mySession = await ort.InferenceSession.create('./my_model.onnx', { executionProviders: ['webgpu'], preferredOutputLocation: { "output_image:0": "gpu-buffer" } }); ``` now you don't need to prepare the `fetches` object and onnxruntime-web will prepare output data on the location that specified. #### read data when you get the output tensor, you can: ```js // get the gpu buffer object: const gpuBuffer = myOutputTensor.gpuBuffer; // GPUBuffer // get the CPU data asynchronizely const cpuData = await myOutputTensor.getData(); // get the CPU data asynchronizely and release the underlying GPU resources const cpuData = await myOutputTensor.getData(true); // dispose the tensor (release the underlying GPU resources). This tensor object will be invalid after dispose() is called. myOutputTensor.dispose(); ``` #### resource management JavaScript has GC so you don't need to worry about managing JavaScript objects. But there are 2 types of resources that are not managed by GC: - GPU buffer that used in tensors - Underlying ORT native resources To simplify, most of the unmanaged resources and handled inside ORT web. But there are a few resources that need users to manage: - All external GPU resources, including GPU buffers inside all tensors created by `Tensor.fromGpuBuffer()`, will not be managed by ORT. User should manage those GPU buffers themselves. - When a session is created with `preferredOutputLocation` == "gpu-buffer" specified in session options, and the corresponding output is not pre-allocated, user need to call the output tensor's `dispose()` or `getData(true)` to manually release the underlying GPU buffers. - ORT internal errors (including providing a pre-allocated output tensor with wrong type/dims) will invalidate the whole wasm memory and is not recoverable. An exception is thrown in this situation.
198 lines
5.1 KiB
TypeScript
198 lines
5.1 KiB
TypeScript
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
// Licensed under the MIT License.
|
|
|
|
import {Tensor} from 'onnxruntime-common';
|
|
|
|
// This file includes common definitions. They do NOT have dependency on the WebAssembly instance.
|
|
|
|
/**
|
|
* Copied from ONNX definition. Use this to drop dependency 'onnx_proto' to decrease compiled .js file size.
|
|
*/
|
|
export const enum DataType {
|
|
undefined = 0,
|
|
float = 1,
|
|
uint8 = 2,
|
|
int8 = 3,
|
|
uint16 = 4,
|
|
int16 = 5,
|
|
int32 = 6,
|
|
int64 = 7,
|
|
string = 8,
|
|
bool = 9,
|
|
float16 = 10,
|
|
double = 11,
|
|
uint32 = 12,
|
|
uint64 = 13,
|
|
complex64 = 14,
|
|
complex128 = 15,
|
|
bfloat16 = 16
|
|
}
|
|
|
|
/**
|
|
* Map string tensor data to enum value
|
|
*/
|
|
export const tensorDataTypeStringToEnum = (type: string): DataType => {
|
|
switch (type) {
|
|
case 'int8':
|
|
return DataType.int8;
|
|
case 'uint8':
|
|
return DataType.uint8;
|
|
case 'bool':
|
|
return DataType.bool;
|
|
case 'int16':
|
|
return DataType.int16;
|
|
case 'uint16':
|
|
return DataType.uint16;
|
|
case 'int32':
|
|
return DataType.int32;
|
|
case 'uint32':
|
|
return DataType.uint32;
|
|
case 'float16':
|
|
return DataType.float16;
|
|
case 'float32':
|
|
return DataType.float;
|
|
case 'float64':
|
|
return DataType.double;
|
|
case 'string':
|
|
return DataType.string;
|
|
case 'int64':
|
|
return DataType.int64;
|
|
case 'uint64':
|
|
return DataType.uint64;
|
|
|
|
default:
|
|
throw new Error(`unsupported data type: ${type}`);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Map enum value to string tensor data
|
|
*/
|
|
export const tensorDataTypeEnumToString = (typeProto: DataType): Tensor.Type => {
|
|
switch (typeProto) {
|
|
case DataType.int8:
|
|
return 'int8';
|
|
case DataType.uint8:
|
|
return 'uint8';
|
|
case DataType.bool:
|
|
return 'bool';
|
|
case DataType.int16:
|
|
return 'int16';
|
|
case DataType.uint16:
|
|
return 'uint16';
|
|
case DataType.int32:
|
|
return 'int32';
|
|
case DataType.uint32:
|
|
return 'uint32';
|
|
case DataType.float16:
|
|
return 'float16';
|
|
case DataType.float:
|
|
return 'float32';
|
|
case DataType.double:
|
|
return 'float64';
|
|
case DataType.string:
|
|
return 'string';
|
|
case DataType.int64:
|
|
return 'int64';
|
|
case DataType.uint64:
|
|
return 'uint64';
|
|
|
|
default:
|
|
throw new Error(`unsupported data type: ${typeProto}`);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* get tensor element size in bytes by the given data type
|
|
* @returns size in integer or undefined if the data type is not supported
|
|
*/
|
|
export const getTensorElementSize = (dateType: number): number|
|
|
undefined => [undefined, 4, 1, 1, 2, 2, 4, 8, undefined, 1, 2, 8, 4, 8, undefined, undefined, undefined][dateType];
|
|
|
|
/**
|
|
* get typed array constructor by the given tensor type
|
|
*/
|
|
export const tensorTypeToTypedArrayConstructor = (type: Tensor.Type): Float32ArrayConstructor|Uint8ArrayConstructor|
|
|
Int8ArrayConstructor|Uint16ArrayConstructor|Int16ArrayConstructor|Int32ArrayConstructor|BigInt64ArrayConstructor|
|
|
Uint8ArrayConstructor|Float64ArrayConstructor|Uint32ArrayConstructor|BigUint64ArrayConstructor => {
|
|
switch (type) {
|
|
case 'float16':
|
|
return Uint16Array;
|
|
case 'float32':
|
|
return Float32Array;
|
|
case 'uint8':
|
|
return Uint8Array;
|
|
case 'int8':
|
|
return Int8Array;
|
|
case 'uint16':
|
|
return Uint16Array;
|
|
case 'int16':
|
|
return Int16Array;
|
|
case 'int32':
|
|
return Int32Array;
|
|
case 'bool':
|
|
return Uint8Array;
|
|
case 'float64':
|
|
return Float64Array;
|
|
case 'uint32':
|
|
return Uint32Array;
|
|
case 'int64':
|
|
return BigInt64Array;
|
|
case 'uint64':
|
|
return BigUint64Array;
|
|
default:
|
|
throw new Error(`unsupported type: ${type}`);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Map string log level to integer value
|
|
*/
|
|
export const logLevelStringToEnum = (logLevel?: 'verbose'|'info'|'warning'|'error'|'fatal'): number => {
|
|
switch (logLevel) {
|
|
case 'verbose':
|
|
return 0;
|
|
case 'info':
|
|
return 1;
|
|
case 'warning':
|
|
return 2;
|
|
case 'error':
|
|
return 3;
|
|
case 'fatal':
|
|
return 4;
|
|
default:
|
|
throw new Error(`unsupported logging level: ${logLevel}`);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Check whether the given tensor type is supported by GPU buffer
|
|
*/
|
|
export const isGpuBufferSupportedType = (type: Tensor.Type): type is Tensor.GpuBufferDataTypes => type === 'float32' ||
|
|
type === 'int32' || type === 'int64' || type === 'bool' || type === 'float16' || type === 'uint32';
|
|
|
|
/**
|
|
* Map string data location to integer value
|
|
*/
|
|
export const dataLocationStringToEnum = (location: Tensor.DataLocation): number => {
|
|
switch (location) {
|
|
case 'none':
|
|
return 0;
|
|
case 'cpu':
|
|
return 1;
|
|
case 'cpu-pinned':
|
|
return 2;
|
|
case 'texture':
|
|
return 3;
|
|
case 'gpu-buffer':
|
|
return 4;
|
|
default:
|
|
throw new Error(`unsupported data location: ${location}`);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Map integer data location to string value
|
|
*/
|
|
export const dataLocationEnumToString = (location: number): Tensor.DataLocation|undefined =>
|
|
(['none', 'cpu', 'cpu-pinned', 'texture', 'gpu-buffer'] as const)[location];
|