onnxruntime/js/web/lib/wasm/jsep/init.ts
Yulong Wang 79e50aeef3
[js/web] rewrite backend resolve to allow multiple EPs (#19735)
### Description

This PR rewrite the backend resolve logic to support specifying multiple
EPs.

#### Backend

The first version of ONNX Runtime Web actually carried some existing
code from [ONNX.js](https://github.com/microsoft/onnxjs), which includes
the "backend" concept. The original "backend" in ONNX.js is designed in
a way assuming there is only one backend from user's backend hint list
will be used. For example, in ONNX.js, if user specify a backend hint as
`['webgl', 'wasm']`, ONNX.js will first try to use WebGL backend - if it
loads successfully (the browser supports webgl), then "webgl" backend
will be used and "wasm" will be ignored; otherwise, "webgl" will be
ignored and try to load "wasm" backend.

In short: only one backend will be used when initializing a session.

#### Execution Provider

Execution Provider, or EP, in ONNX Runtime is a different concept. One
of the differences is that users are allow to specify multiple EPs, and
if one does not support a particular kernel, it can fallback to other
EP. This is a very common case when using a GPU EP in ONNX Runtime.

#### Current Status: Backend v.s. EP

Because of the history reasons mentioned above, the current status is
quite confusing. There are **real backend**s, which means it's different
implementation in code; and there are **backend hint**s, which are used
as string names for backend hint; and there are **EP**s of the ONNX
Runtime concepts.

currently there are only 2 **backend**s in our code base: The "onnxjs
backend", and the "wasm backend". The "onnxjs backend" currently only
powers backend hint "webgl", which go into the old onnx.js code path.
All other backend hints including "wasm", "cpu"(alias to wasm), "webgpu"
and "webnn" are all powered by "wasm backend".

And because ORT Web treat "backend" as an internal concept and want to
align with ONNX Runtime, so those names of backend hints are becoming EP
names.

The following table shows today's status:

| Execution Provider Name (public) / Backend Hint (internal) | Backend |
EP in ORT
| -------- | ------- | ------- |
| "wasm"/"cpu" | WasmBackend | CPU EP
| "webgl" | OnnxjsBackend | \* technically not an EP
| "webgpu" | WasmBackend | JSEP
| "webnn" | WasmBackend | WebNN EP

#### Problem

While the API allows to specify multiple EPs, the backend resolving only
allows one backend. This causes issues when user specify multiple EP
names in session options, the backend resolve behavior and EP
registration behavior is inconsistent. Specifically, in this issue:
https://github.com/microsoft/onnxruntime/issues/15796#issuecomment-1925363908:

EP list `['webgpu', 'wasm']` on a browser without WebGPU support
resolves to 'wasm' backend, but the full EP list is passed in session
options, so JSEP is still enabled, causing the runtime error.


#### Solution

Since we still need WebGL backend, we cannot totally remove the backend
register/resolve system. In this PR I made the following changes:
- initialize every backend from the EP list, instead of only do that for
the first successful one.
- for the first resolved backend, filter all EP using the exact same
backend. Remove all EPs not using this backend from session options
- for every explicitly specified EP, if it's removed, show a warning
message in console
2024-03-15 11:47:45 -07:00

230 lines
8.8 KiB
TypeScript

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
import {Env} from 'onnxruntime-common';
import {OrtWasmModule} from '../binding/ort-wasm';
import {DataType, getTensorElementSize} from '../wasm-common';
import {WebGpuBackend} from './backend-webgpu';
import {LOG_DEBUG} from './log';
import {TensorView} from './tensor-view';
import {ShapeUtil} from './util';
import {AdapterInfo, ComputeContext, ComputeContextInputsOutputsMapping, ProgramInfo} from './webgpu/types';
/* eslint-disable no-bitwise */
class TensorViewImpl implements TensorView {
constructor(
private module: OrtWasmModule, public readonly dataType: number, public readonly data: number,
public readonly dims: readonly number[]) {}
getFloat32Array(): Float32Array {
if (this.dataType !== DataType.float) {
throw new Error('Invalid data type');
}
const elementCount = ShapeUtil.size(this.dims);
return elementCount === 0 ? new Float32Array() :
new Float32Array(this.module.HEAP8.buffer, this.data, elementCount);
}
getBigInt64Array(): BigInt64Array {
if (this.dataType !== DataType.int64) {
throw new Error('Invalid data type');
}
const elementCount = ShapeUtil.size(this.dims);
return elementCount === 0 ? new BigInt64Array() :
new BigInt64Array(this.module.HEAP8.buffer, this.data, elementCount);
}
getInt32Array(): Int32Array {
if (this.dataType !== DataType.int32) {
throw new Error('Invalid data type');
}
const elementCount = ShapeUtil.size(this.dims);
return elementCount === 0 ? new Int32Array() : new Int32Array(this.module.HEAP8.buffer, this.data, elementCount);
}
reshape(newDims: readonly number[]): TensorView {
if (ShapeUtil.size(newDims) !== ShapeUtil.size(this.dims)) {
throw new Error('Invalid new shape');
}
return new TensorViewImpl(this.module, this.dataType, this.data, newDims);
}
}
class ComputeContextImpl implements ComputeContext {
readonly adapterInfo: AdapterInfo;
readonly opKernelContext: number;
readonly inputs: readonly TensorView[];
readonly outputCount: number;
get kernelCustomData(): {[key: string]: unknown} {
return this.backend.currentKernelCustomData;
}
get customDataBuffer(): Uint8Array {
return this.module.HEAPU8.subarray(this.customDataOffset, this.customDataOffset + this.customDataSize);
}
private customDataOffset = 0;
private customDataSize = 0;
constructor(private module: OrtWasmModule, private backend: WebGpuBackend, contextDataOffset: number) {
this.adapterInfo = backend.adapterInfo;
const heapU32 = module.HEAPU32;
// extract context data
let dataIndex = (contextDataOffset >>> 2);
this.opKernelContext = heapU32[dataIndex++];
const inputCount = heapU32[dataIndex++];
this.outputCount = heapU32[dataIndex++];
this.customDataOffset = heapU32[dataIndex++];
this.customDataSize = heapU32[dataIndex++];
const inputs: TensorView[] = [];
for (let i = 0; i < inputCount; i++) {
const dataType = heapU32[dataIndex++];
const data = heapU32[dataIndex++];
const dim = heapU32[dataIndex++];
const dims: number[] = [];
for (let d = 0; d < dim; d++) {
dims.push(heapU32[dataIndex++]);
}
inputs.push(new TensorViewImpl(module, dataType, data, dims));
}
this.inputs = inputs;
}
compute(program: ProgramInfo, inputsOutputsMapping?: ComputeContextInputsOutputsMapping): TensorView[] {
// prepare inputs. inputs should always be valid data.
const mappedInputs =
inputsOutputsMapping?.inputs?.map(i => typeof i === 'number' ? this.inputs[i] : i) ?? this.inputs;
// prepare outputs.
const outputIndices = inputsOutputsMapping?.outputs ?? [];
const createKernelOutput = (index: number, dataType: number, dims: readonly number[]): TensorView =>
new TensorViewImpl(this.module, dataType, this.output(index, dims), dims);
const createTemporaryOutput = (dataType: number, dims: readonly number[]): TensorView => {
const elementSize = getTensorElementSize(dataType);
if (!elementSize) {
throw new Error(`Unsupported data type: ${dataType}`);
}
const bufferSize = elementSize * ShapeUtil.size(dims);
const gpuDataId = bufferSize > 0 ? this.backend.gpuDataManager.create(bufferSize).id : 0;
return new TensorViewImpl(this.module, dataType, gpuDataId, dims);
};
return this.backend.run(program, mappedInputs, outputIndices, createKernelOutput, createTemporaryOutput);
}
output(index: number, dims: readonly number[]): number {
const stack = this.module.stackSave();
try {
const data = this.module.stackAlloc((1 + dims.length) * 4 /* sizeof(size_t) */);
let offset = data >> 2;
this.module.HEAPU32[offset++] = dims.length;
for (let i = 0; i < dims.length; i++) {
this.module.HEAPU32[offset++] = dims[i];
}
return this.module._JsepOutput!(this.opKernelContext, index, data);
} catch (e) {
throw new Error(
`Failed to generate kernel's output[${index}] with dims [${dims}]. ` +
'If you are running with pre-allocated output, please make sure the output type/dims are correct. ' +
`Error: ${e}`);
} finally {
this.module.stackRestore(stack);
}
}
}
/**
* Initialize JSEP with WebGPU backend.
*
* This function will be called after the WebAssembly module is loaded and initialized ("_OrtInit" is called), once for
* each of the following EPs if they are specified:
* - "webgpu"
* - "webnn"
*
* For WebGPU, this function expects:
* - WebGPU is enabled in build (BUILD_DEFS.DISABLE_WEBGPU === false).
* - WebGPU is available in current environment. (a valid GPUAdapter is passed in)
*
* For WebNN, this function expects:
* - WebNN is enabled in build (BUILD_DEFS.DISABLE_WEBGPU === false).
* - WebNN is available in current environment. (navigator.ml is not undefined)
*
* If the WebAssembly module is not built with JSEP support, this function will throw an error. This will invalidate
* 'webgpu'/'webnn' backend.
*
* @param name - the name of the EP, either "webgpu" or "webnn"
* @param module - the ORT WebAssembly module
* @param env - the ORT environment variable (ort.env)
* @param gpuAdapter - the pre-created GPU adapter
*/
export const init =
async(name: 'webgpu'|'webnn', module: OrtWasmModule, env: Env, gpuAdapter?: GPUAdapter): Promise<void> => {
const jsepInit = module.jsepInit;
if (!jsepInit) {
throw new Error('Failed to initialize JSEP. The WebAssembly module is not built with JSEP support.');
}
if (name === 'webgpu') {
const backend = new WebGpuBackend();
await backend.initialize(env, gpuAdapter!);
jsepInit('webgpu', [
// backend
backend,
// jsepAlloc()
(size: number) => backend.alloc(size),
// jsepFree()
(ptr: number) => backend.free(ptr),
// jsepCopy(src, dst, size, isSourceGpu)
(src: number, dst: number, size: number, isSourceGpu = false) => {
if (isSourceGpu) {
LOG_DEBUG('verbose', () => `[WebGPU] jsepCopyGpuToGpu: src=${src}, dst=${dst}, size=${size}`);
backend.memcpy(src, dst);
} else {
LOG_DEBUG('verbose', () => `[WebGPU] jsepCopyCpuToGpu: dataOffset=${src}, gpuDataId=${dst}, size=${size}`);
const data = module.HEAPU8.subarray(src >>> 0, (src >>> 0) + size);
backend.upload(dst, data);
}
},
// jsepCopyAsync(src, dst, size)
async(gpuDataId: number, dataOffset: number, size: number):
Promise<void> => {
LOG_DEBUG(
'verbose',
() => `[WebGPU] jsepCopyGpuToCpu: gpuDataId=${gpuDataId}, dataOffset=${dataOffset}, size=${size}`);
await backend.download(
gpuDataId, () => module.HEAPU8.subarray(dataOffset >>> 0, (dataOffset >>> 0) + size));
},
// jsepCreateKernel
(kernelType: string, kernelId: number, attribute: unknown) => backend.createKernel(
kernelType, kernelId, attribute, module.UTF8ToString(module._JsepGetNodeName!(kernelId))),
// jsepReleaseKernel
(kernel: number) => backend.releaseKernel(kernel),
// jsepRun
(kernel: number, contextDataOffset: number, sessionHandle: number, errors: Array<Promise<string|null>>) => {
LOG_DEBUG(
'verbose',
() => `[WebGPU] jsepRun: sessionHandle=${sessionHandle}, kernel=${kernel}, contextDataOffset=${
contextDataOffset}`);
const context = new ComputeContextImpl(module, backend, contextDataOffset);
return backend.computeKernel(kernel, context, errors);
},
// jsepCaptureBegin
() => backend.captureBegin(),
// jsepCaptureEnd
() => backend.captureEnd(),
// jsepReplay
() => backend.replay()
]);
} else {
jsepInit('webnn');
}
};