[js/web] rewrite backend resolve to allow multiple EPs (#19735)

### Description

This PR rewrite the backend resolve logic to support specifying multiple
EPs.

#### Backend

The first version of ONNX Runtime Web actually carried some existing
code from [ONNX.js](https://github.com/microsoft/onnxjs), which includes
the "backend" concept. The original "backend" in ONNX.js is designed in
a way assuming there is only one backend from user's backend hint list
will be used. For example, in ONNX.js, if user specify a backend hint as
`['webgl', 'wasm']`, ONNX.js will first try to use WebGL backend - if it
loads successfully (the browser supports webgl), then "webgl" backend
will be used and "wasm" will be ignored; otherwise, "webgl" will be
ignored and try to load "wasm" backend.

In short: only one backend will be used when initializing a session.

#### Execution Provider

Execution Provider, or EP, in ONNX Runtime is a different concept. One
of the differences is that users are allow to specify multiple EPs, and
if one does not support a particular kernel, it can fallback to other
EP. This is a very common case when using a GPU EP in ONNX Runtime.

#### Current Status: Backend v.s. EP

Because of the history reasons mentioned above, the current status is
quite confusing. There are **real backend**s, which means it's different
implementation in code; and there are **backend hint**s, which are used
as string names for backend hint; and there are **EP**s of the ONNX
Runtime concepts.

currently there are only 2 **backend**s in our code base: The "onnxjs
backend", and the "wasm backend". The "onnxjs backend" currently only
powers backend hint "webgl", which go into the old onnx.js code path.
All other backend hints including "wasm", "cpu"(alias to wasm), "webgpu"
and "webnn" are all powered by "wasm backend".

And because ORT Web treat "backend" as an internal concept and want to
align with ONNX Runtime, so those names of backend hints are becoming EP
names.

The following table shows today's status:

| Execution Provider Name (public) / Backend Hint (internal) | Backend |
EP in ORT
| -------- | ------- | ------- |
| "wasm"/"cpu" | WasmBackend | CPU EP
| "webgl" | OnnxjsBackend | \* technically not an EP
| "webgpu" | WasmBackend | JSEP
| "webnn" | WasmBackend | WebNN EP

#### Problem

While the API allows to specify multiple EPs, the backend resolving only
allows one backend. This causes issues when user specify multiple EP
names in session options, the backend resolve behavior and EP
registration behavior is inconsistent. Specifically, in this issue:
https://github.com/microsoft/onnxruntime/issues/15796#issuecomment-1925363908:

EP list `['webgpu', 'wasm']` on a browser without WebGPU support
resolves to 'wasm' backend, but the full EP list is passed in session
options, so JSEP is still enabled, causing the runtime error.


#### Solution

Since we still need WebGL backend, we cannot totally remove the backend
register/resolve system. In this PR I made the following changes:
- initialize every backend from the EP list, instead of only do that for
the first successful one.
- for the first resolved backend, filter all EP using the exact same
backend. Remove all EPs not using this backend from session options
- for every explicitly specified EP, if it's removed, show a warning
message in console
This commit is contained in:
Yulong Wang 2024-03-15 11:47:45 -07:00 committed by GitHub
parent 0b2a75b274
commit 79e50aeef3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 351 additions and 235 deletions

View file

@ -2,6 +2,7 @@
// Licensed under the MIT License.
import {Backend} from './backend.js';
import {InferenceSession} from './inference-session.js';
interface BackendInfo {
backend: Backend;
@ -10,6 +11,7 @@ interface BackendInfo {
initPromise?: Promise<void>;
initialized?: boolean;
aborted?: boolean;
error?: string;
}
const backends: Map<string, BackendInfo> = new Map();
@ -60,43 +62,100 @@ export const registerBackend = (name: string, backend: Backend, priority: number
};
/**
* Resolve backend by specified hints.
* Try to resolve and initialize a backend.
*
* @param backendHints - a list of execution provider names to lookup. If omitted use registered backends as list.
* @returns a promise that resolves to the backend.
* @param backendName - the name of the backend.
* @returns the backend instance if resolved and initialized successfully, or an error message if failed.
*/
const tryResolveAndInitializeBackend = async(backendName: string): Promise<Backend|string> => {
const backendInfo = backends.get(backendName);
if (!backendInfo) {
return 'backend not found.';
}
if (backendInfo.initialized) {
return backendInfo.backend;
} else if (backendInfo.aborted) {
return backendInfo.error!;
} else {
const isInitializing = !!backendInfo.initPromise;
try {
if (!isInitializing) {
backendInfo.initPromise = backendInfo.backend.init(backendName);
}
await backendInfo.initPromise;
backendInfo.initialized = true;
return backendInfo.backend;
} catch (e) {
if (!isInitializing) {
backendInfo.error = `${e}`;
backendInfo.aborted = true;
}
return backendInfo.error!;
} finally {
delete backendInfo.initPromise;
}
}
};
/**
* Resolve execution providers from the specific session options.
*
* @param options - the session options object.
* @returns a promise that resolves to a tuple of an initialized backend instance and a session options object with
* filtered EP list.
*
* @ignore
*/
export const resolveBackend = async(backendHints: readonly string[]): Promise<Backend> => {
const backendNames = backendHints.length === 0 ? backendsSortedByPriority : backendHints;
const errors = [];
for (const backendName of backendNames) {
const backendInfo = backends.get(backendName);
if (backendInfo) {
if (backendInfo.initialized) {
return backendInfo.backend;
} else if (backendInfo.aborted) {
continue; // current backend is unavailable; try next
export const resolveBackendAndExecutionProviders = async(options: InferenceSession.SessionOptions):
Promise<[backend: Backend, options: InferenceSession.SessionOptions]> => {
// extract backend hints from session options
const eps = options.executionProviders || [];
const backendHints = eps.map(i => typeof i === 'string' ? i : i.name);
const backendNames = backendHints.length === 0 ? backendsSortedByPriority : backendHints;
// try to resolve and initialize all requested backends
let backend: Backend|undefined;
const errors = [];
const availableBackendNames = new Set<string>();
for (const backendName of backendNames) {
const resolveResult = await tryResolveAndInitializeBackend(backendName);
if (typeof resolveResult === 'string') {
errors.push({name: backendName, err: resolveResult});
} else {
if (!backend) {
backend = resolveResult;
}
if (backend === resolveResult) {
availableBackendNames.add(backendName);
}
}
}
const isInitializing = !!backendInfo.initPromise;
try {
if (!isInitializing) {
backendInfo.initPromise = backendInfo.backend.init(backendName);
}
await backendInfo.initPromise;
backendInfo.initialized = true;
return backendInfo.backend;
} catch (e) {
if (!isInitializing) {
errors.push({name: backendName, err: e});
}
backendInfo.aborted = true;
} finally {
delete backendInfo.initPromise;
// if no backend is available, throw error.
if (!backend) {
throw new Error(`no available backend found. ERR: ${errors.map(e => `[${e.name}] ${e.err}`).join(', ')}`);
}
}
}
throw new Error(`no available backend found. ERR: ${errors.map(e => `[${e.name}] ${e.err}`).join(', ')}`);
};
// for each explicitly requested backend, if it's not available, output warning message.
for (const {name, err} of errors) {
if (backendHints.includes(name)) {
// eslint-disable-next-line no-console
console.warn(`removing requested execution provider "${
name}" from session options because it is not available: ${err}`);
}
}
const filteredEps = eps.filter(i => availableBackendNames.has(typeof i === 'string' ? i : i.name));
return [
backend, new Proxy(options, {
get: (target, prop) => {
if (prop === 'executionProviders') {
return filteredEps;
}
return Reflect.get(target, prop);
}
})
];
};

View file

@ -1,7 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
import {resolveBackend} from './backend-impl.js';
import {resolveBackendAndExecutionProviders} from './backend-impl.js';
import {InferenceSessionHandler} from './backend.js';
import {InferenceSession as InferenceSessionInterface} from './inference-session.js';
import {OnnxValue} from './onnx-value.js';
@ -195,11 +195,9 @@ export class InferenceSession implements InferenceSessionInterface {
throw new TypeError('Unexpected argument[0]: must be \'path\' or \'buffer\'.');
}
// get backend hints
const eps = options.executionProviders || [];
const backendHints = eps.map(i => typeof i === 'string' ? i : i.name);
const backend = await resolveBackend(backendHints);
const handler = await backend.createInferenceSessionHandler(filePathOrUint8Array, options);
// resolve backend, update session options with validated EPs, and create session handler
const [backend, optionsWithValidatedEPs] = await resolveBackendAndExecutionProviders(options);
const handler = await backend.createInferenceSessionHandler(filePathOrUint8Array, optionsWithValidatedEPs);
TRACE_FUNC_END();
return new InferenceSession(handler);
}

View file

@ -1,7 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
import {resolveBackend} from './backend-impl.js';
import {resolveBackendAndExecutionProviders} from './backend-impl.js';
import {SessionHandler, TrainingSessionHandler} from './backend.js';
import {InferenceSession as InferenceSession} from './inference-session.js';
import {OnnxValue} from './onnx-value.js';
@ -55,13 +55,12 @@ export class TrainingSession implements TrainingSessionInterface {
const optimizerModel: string|Uint8Array = trainingOptions.optimizerModel || '';
const options: SessionOptions = sessionOptions || {};
// get backend hints
const eps = options.executionProviders || [];
const backendHints = eps.map(i => typeof i === 'string' ? i : i.name);
const backend = await resolveBackend(backendHints);
// resolve backend, update session options with validated EPs, and create session handler
const [backend, optionsWithValidatedEPs] = await resolveBackendAndExecutionProviders(options);
if (backend.createTrainingSessionHandler) {
const handler = await backend.createTrainingSessionHandler(
trainingOptions.checkpointState, trainingOptions.trainModel, evalModel, optimizerModel, options);
trainingOptions.checkpointState, trainingOptions.trainModel, evalModel, optimizerModel,
optionsWithValidatedEPs);
return new TrainingSession(handler, !!trainingOptions.optimizerModel, !!trainingOptions.evalModel);
} else {
throw new Error(noBackendErrMsg);

View file

@ -16,20 +16,97 @@ export declare namespace JSEP {
type CaptureBeginFunction = () => void;
type CaptureEndFunction = () => void;
type ReplayFunction = () => void;
export interface Module extends WebGpuModule {
/**
* Mount the external data file to an internal map, which will be used during session initialization.
*
* @param externalDataFilePath - specify the relative path of the external data file.
* @param externalDataFileData - specify the content data.
*/
mountExternalData(externalDataFilePath: string, externalDataFileData: Uint8Array): void;
/**
* Unmount all external data files from the internal map.
*/
unmountExternalData(): void;
/**
* This is the entry of JSEP initialization. This function is called once when initializing ONNX Runtime per
* backend. This function initializes Asyncify support. If name is 'webgpu', also initializes WebGPU backend and
* registers a few callbacks that will be called in C++ code.
*/
jsepInit(name: 'webgpu', initParams: [
backend: BackendType, alloc: AllocFunction, free: FreeFunction, upload: UploadFunction,
download: DownloadFunction, createKernel: CreateKernelFunction, releaseKernel: ReleaseKernelFunction,
run: RunFunction, captureBegin: CaptureBeginFunction, captureEnd: CaptureEndFunction, replay: ReplayFunction
]): void;
jsepInit(name: 'webnn', initParams?: never): void;
}
export interface WebGpuModule {
/**
* [exported from wasm] Specify a kernel's output when running OpKernel::Compute().
*
* @param context - specify the kernel context pointer.
* @param index - specify the index of the output.
* @param data - specify the pointer to encoded data of type and dims.
*/
_JsepOutput(context: number, index: number, data: number): number;
/**
* [exported from wasm] Get name of an operator node.
*
* @param kernel - specify the kernel pointer.
* @returns the pointer to a C-style UTF8 encoded string representing the node name.
*/
_JsepGetNodeName(kernel: number): number;
/**
* [exported from js_internal_api.js] Register a user GPU buffer for usage of a session's input or output.
*
* @param sessionId - specify the session ID.
* @param index - specify an integer to represent which input/output it is registering for. For input, it is the
* input_index corresponding to the session's inputNames. For output, it is the inputCount + output_index
* corresponding to the session's ouputNames.
* @param buffer - specify the GPU buffer to register.
* @param size - specify the original data size in byte.
* @returns the GPU data ID for the registered GPU buffer.
*/
jsepRegisterBuffer: (sessionId: number, index: number, buffer: GPUBuffer, size: number) => number;
/**
* [exported from js_internal_api.js] Get the GPU buffer by GPU data ID.
*
* @param dataId - specify the GPU data ID
* @returns the GPU buffer.
*/
jsepGetBuffer: (dataId: number) => GPUBuffer;
/**
* [exported from js_internal_api.js] Create a function to be used to create a GPU Tensor.
*
* @param gpuBuffer - specify the GPU buffer
* @param size - specify the original data size in byte.
* @param type - specify the tensor type.
* @returns the generated downloader function.
*/
jsepCreateDownloader:
(gpuBuffer: GPUBuffer, size: number,
type: Tensor.GpuBufferDataTypes) => () => Promise<Tensor.DataTypeMap[Tensor.GpuBufferDataTypes]>;
/**
* [exported from js_internal_api.js] Called when InferenceSession.run started. This function will be called before
* _OrtRun[WithBinding]() is called.
* @param sessionId - specify the session ID.
*/
jsepOnRunStart: (sessionId: number) => void;
/**
* [exported from js_internal_api.js] Release a session. This function will be called before _OrtReleaseSession() is
* called.
* @param sessionId - specify the session ID.
* @returns
*/
jsepOnReleaseSession: (sessionId: number) => void;
}
}
export interface OrtWasmModule extends EmscriptenModule {
// #region emscripten functions
stackSave(): number;
stackRestore(stack: number): void;
stackAlloc(size: number): number;
UTF8ToString(offset: number, maxBytesToRead?: number): string;
lengthBytesUTF8(str: string): number;
stringToUTF8(str: string, offset: number, maxBytes: number): void;
// #endregion
// #region ORT APIs
export interface OrtInferenceAPIs {
_OrtInit(numThreads: number, loggingLevel: number): number;
_OrtGetLastError(errorCodeOffset: number, errorMessageOffset: number): void;
@ -74,126 +151,61 @@ export interface OrtWasmModule extends EmscriptenModule {
_OrtReleaseRunOptions(runOptionsHandle: number): void;
_OrtEndProfiling(sessionHandle: number): number;
// #endregion
}
// #region ORT Training APIs
_OrtTrainingLoadCheckpoint?(dataOffset: number, dataLength: number): number;
export interface OrtTrainingAPIs {
_OrtTrainingLoadCheckpoint(dataOffset: number, dataLength: number): number;
_OrtTrainingReleaseCheckpoint?(checkpointHandle: number): void;
_OrtTrainingReleaseCheckpoint(checkpointHandle: number): void;
_OrtTrainingCreateSession?
(sessionOptionsHandle: number, checkpointHandle: number, trainOffset: number, trainLength: number,
evalOffset: number, evalLength: number, optimizerOffset: number, optimizerLength: number): number;
_OrtTrainingCreateSession(
sessionOptionsHandle: number, checkpointHandle: number, trainOffset: number, trainLength: number,
evalOffset: number, evalLength: number, optimizerOffset: number, optimizerLength: number): number;
_OrtTrainingLazyResetGrad?(trainingHandle: number): number;
_OrtTrainingLazyResetGrad(trainingHandle: number): number;
_OrtTrainingRunTrainStep?
(trainingHandle: number, inputsOffset: number, inputCount: number, outputsOffset: number, outputCount: number,
runOptionsHandle: number): number;
_OrtTrainingRunTrainStep(
trainingHandle: number, inputsOffset: number, inputCount: number, outputsOffset: number, outputCount: number,
runOptionsHandle: number): number;
_OrtTrainingOptimizerStep?(trainingHandle: number, runOptionsHandle: number): number;
_OrtTrainingOptimizerStep(trainingHandle: number, runOptionsHandle: number): number;
_OrtTrainingEvalStep?
(trainingHandle: number, inputsOffset: number, inputCount: number, outputsOffset: number, outputCount: number,
runOptionsHandle: number): number;
_OrtTrainingEvalStep(
trainingHandle: number, inputsOffset: number, inputCount: number, outputsOffset: number, outputCount: number,
runOptionsHandle: number): number;
_OrtTrainingGetParametersSize?(trainingHandle: number, paramSizeT: number, trainableOnly: boolean): number;
_OrtTrainingGetParametersSize(trainingHandle: number, paramSizeT: number, trainableOnly: boolean): number;
_OrtTrainingCopyParametersToBuffer?
(trainingHandle: number, parametersBuffer: number, parameterCount: number, trainableOnly: boolean): number;
_OrtTrainingCopyParametersToBuffer(
trainingHandle: number, parametersBuffer: number, parameterCount: number, trainableOnly: boolean): number;
_OrtTrainingCopyParametersFromBuffer?
(trainingHandle: number, parametersBuffer: number, parameterCount: number, trainableOnly: boolean): number;
_OrtTrainingCopyParametersFromBuffer(
trainingHandle: number, parametersBuffer: number, parameterCount: number, trainableOnly: boolean): number;
_OrtTrainingGetModelInputOutputCount?
(trainingHandle: number, inputCount: number, outputCount: number, isEvalModel: boolean): number;
_OrtTrainingGetModelInputOutputName?
(trainingHandle: number, index: number, isInput: boolean, isEvalModel: boolean): number;
_OrtTrainingGetModelInputOutputCount(
trainingHandle: number, inputCount: number, outputCount: number, isEvalModel: boolean): number;
_OrtTrainingGetModelInputOutputName(trainingHandle: number, index: number, isInput: boolean, isEvalModel: boolean):
number;
_OrtTrainingReleaseSession?(trainingHandle: number): void;
_OrtTrainingReleaseSession(trainingHandle: number): void;
}
export interface OrtWasmModule extends EmscriptenModule, OrtInferenceAPIs, Partial<OrtTrainingAPIs>,
Partial<JSEP.Module> {
// #region emscripten functions
stackSave(): number;
stackRestore(stack: number): void;
stackAlloc(size: number): number;
UTF8ToString(offset: number, maxBytesToRead?: number): string;
lengthBytesUTF8(str: string): number;
stringToUTF8(str: string, offset: number, maxBytes: number): void;
// #endregion
// #region config
numThreads?: number;
mainScriptUrlOrBlob?: string|Blob;
// #endregion
// #region external data API
mountExternalData?(externalDataFilePath: string, externalDataFileData: Uint8Array): void;
unmountExternalData?(): void;
// #endregion
// #region JSEP
/**
* This is the entry of JSEP initialization. This function is called once when initializing ONNX Runtime.
* This function initializes WebGPU backend and registers a few callbacks that will be called in C++ code.
*/
jsepInit?
(backend: JSEP.BackendType, alloc: JSEP.AllocFunction, free: JSEP.FreeFunction, upload: JSEP.UploadFunction,
download: JSEP.DownloadFunction, createKernel: JSEP.CreateKernelFunction,
releaseKernel: JSEP.ReleaseKernelFunction, run: JSEP.RunFunction, captureBegin: JSEP.CaptureBeginFunction,
captureEnd: JSEP.CaptureEndFunction, replay: JSEP.ReplayFunction): void;
/**
* [exported from wasm] Specify a kernel's output when running OpKernel::Compute().
*
* @param context - specify the kernel context pointer.
* @param index - specify the index of the output.
* @param data - specify the pointer to encoded data of type and dims.
*/
_JsepOutput(context: number, index: number, data: number): number;
/**
* [exported from wasm] Get name of an operator node.
*
* @param kernel - specify the kernel pointer.
* @returns the pointer to a C-style UTF8 encoded string representing the node name.
*/
_JsepGetNodeName(kernel: number): number;
/**
* [exported from js_internal_api.js] Register a user GPU buffer for usage of a session's input or output.
*
* @param sessionId - specify the session ID.
* @param index - specify an integer to represent which input/output it is registering for. For input, it is the
* input_index corresponding to the session's inputNames. For output, it is the inputCount + output_index
* corresponding to the session's ouputNames.
* @param buffer - specify the GPU buffer to register.
* @param size - specify the original data size in byte.
* @returns the GPU data ID for the registered GPU buffer.
*/
jsepRegisterBuffer: (sessionId: number, index: number, buffer: GPUBuffer, size: number) => number;
/**
* [exported from js_internal_api.js] Get the GPU buffer by GPU data ID.
*
* @param dataId - specify the GPU data ID
* @returns the GPU buffer.
*/
jsepGetBuffer: (dataId: number) => GPUBuffer;
/**
* [exported from js_internal_api.js] Create a function to be used to create a GPU Tensor.
*
* @param gpuBuffer - specify the GPU buffer
* @param size - specify the original data size in byte.
* @param type - specify the tensor type.
* @returns the generated downloader function.
*/
jsepCreateDownloader:
(gpuBuffer: GPUBuffer, size: number,
type: Tensor.GpuBufferDataTypes) => () => Promise<Tensor.DataTypeMap[Tensor.GpuBufferDataTypes]>;
/**
* [exported from js_internal_api.js] Called when InferenceSession.run started. This function will be called before
* _OrtRun[WithBinding]() is called.
* @param sessionId - specify the session ID.
*/
jsepOnRunStart: (sessionId: number) => void;
/**
* [exported from js_internal_api.js] Release a session. This function will be called before _OrtReleaseSession() is
* called.
* @param sessionId - specify the session ID.
* @returns
*/
jsepOnReleaseSession: (sessionId: number) => void;
// #endregion
}
declare const moduleFactory: EmscriptenModuleFactory<OrtWasmModule>;

View file

@ -121,7 +121,7 @@ class ComputeContextImpl implements ComputeContext {
for (let i = 0; i < dims.length; i++) {
this.module.HEAPU32[offset++] = dims[i];
}
return this.module._JsepOutput(this.opKernelContext, index, data);
return this.module._JsepOutput!(this.opKernelContext, index, data);
} catch (e) {
throw new Error(
`Failed to generate kernel's output[${index}] with dims [${dims}]. ` +
@ -136,27 +136,39 @@ class ComputeContextImpl implements ComputeContext {
/**
* Initialize JSEP with WebGPU backend.
*
* This function will be called only once after the WebAssembly module is loaded and initialized ("_OrtInit" is called).
* This function expects:
* This function will be called after the WebAssembly module is loaded and initialized ("_OrtInit" is called), once for
* each of the following EPs if they are specified:
* - "webgpu"
* - "webnn"
*
* For WebGPU, this function expects:
* - WebGPU is enabled in build (BUILD_DEFS.DISABLE_WEBGPU === false).
* - WebGPU is available in current environment. (a valid GPUAdapter is passed in)
* If the WebAssembly module is not built with JSEP support, this function will throw an error. This will invalidate
* 'webgpu' backend.
*
* For WebNN, this function expects:
* - WebNN is enabled in build (BUILD_DEFS.DISABLE_WEBGPU === false).
* - WebNN is available in current environment. (navigator.ml is not undefined)
*
* If the WebAssembly module is not built with JSEP support, this function will throw an error. This will invalidate
* 'webgpu'/'webnn' backend.
*
* @param name - the name of the EP, either "webgpu" or "webnn"
* @param module - the ORT WebAssembly module
* @param env - the ORT environment variable (ort.env)
* @param gpuAdapter - the pre-created GPU adapter
*/
export const init = async(module: OrtWasmModule, env: Env, gpuAdapter: GPUAdapter): Promise<void> => {
export const init =
async(name: 'webgpu'|'webnn', module: OrtWasmModule, env: Env, gpuAdapter?: GPUAdapter): Promise<void> => {
const jsepInit = module.jsepInit;
if (!jsepInit) {
throw new Error('Failed to initialize JSEP. The WebAssembly module is not built with JSEP support.');
}
const backend = new WebGpuBackend();
await backend.initialize(env, gpuAdapter);
if (name === 'webgpu') {
const backend = new WebGpuBackend();
await backend.initialize(env, gpuAdapter!);
jsepInit(
jsepInit('webgpu', [
// backend
backend,
@ -190,8 +202,8 @@ export const init = async(module: OrtWasmModule, env: Env, gpuAdapter: GPUAdapte
},
// jsepCreateKernel
(kernelType: string, kernelId: number, attribute: unknown) =>
backend.createKernel(kernelType, kernelId, attribute, module.UTF8ToString(module._JsepGetNodeName(kernelId))),
(kernelType: string, kernelId: number, attribute: unknown) => backend.createKernel(
kernelType, kernelId, attribute, module.UTF8ToString(module._JsepGetNodeName!(kernelId))),
// jsepReleaseKernel
(kernel: number) => backend.releaseKernel(kernel),
@ -210,5 +222,9 @@ export const init = async(module: OrtWasmModule, env: Env, gpuAdapter: GPUAdapte
// jsepCaptureEnd
() => backend.captureEnd(),
// jsepReplay
() => backend.replay());
() => backend.replay()
]);
} else {
jsepInit('webnn');
}
};

View file

@ -155,7 +155,7 @@ export const createSession =
ensureWorker();
return new Promise<SerializableSessionMetadata>((resolve, reject) => {
enqueueCallbacks('create', [resolve, reject]);
const message: OrtWasmMessage = {type: 'create', in : {model, options}};
const message: OrtWasmMessage = {type: 'create', in : {model, options: {...options}}};
const transferable: Transferable[] = [];
if (model instanceof Uint8Array) {
transferable.push(model.buffer);

View file

@ -84,35 +84,44 @@ export const initRuntime = async(env: Env): Promise<void> => {
* @param epName
*/
export const initEp = async(env: Env, epName: string): Promise<void> => {
if (!BUILD_DEFS.DISABLE_WEBGPU && (epName === 'webgpu' || epName === 'webnn')) {
// perform WebGPU availability check
if (typeof navigator === 'undefined' || !navigator.gpu) {
throw new Error('WebGPU is not supported in current environment');
}
const powerPreference = env.webgpu?.powerPreference;
if (powerPreference !== undefined && powerPreference !== 'low-power' && powerPreference !== 'high-performance') {
throw new Error(`Invalid powerPreference setting: "${powerPreference}"`);
}
const forceFallbackAdapter = env.webgpu?.forceFallbackAdapter;
if (forceFallbackAdapter !== undefined && typeof forceFallbackAdapter !== 'boolean') {
throw new Error(`Invalid forceFallbackAdapter setting: "${forceFallbackAdapter}"`);
}
const adapter = await navigator.gpu.requestAdapter({powerPreference, forceFallbackAdapter});
if (!adapter) {
throw new Error(
'Failed to get GPU adapter. You may need to enable flag "--enable-unsafe-webgpu" if you are using Chrome.');
}
if (!env.wasm.simd) {
throw new Error(
'Not supported for WebGPU=ON and SIMD=OFF. Please set `env.wasm.simd` to true when using `webgpu` EP');
}
// init JSEP if available
if (!BUILD_DEFS.DISABLE_WEBGPU) {
// eslint-disable-next-line @typescript-eslint/no-require-imports, @typescript-eslint/no-var-requires
const initJsep = require('./jsep/init').init;
await initJsep(getInstance(), env, adapter);
if (epName === 'webgpu') {
// perform WebGPU availability check
if (typeof navigator === 'undefined' || !navigator.gpu) {
throw new Error('WebGPU is not supported in current environment');
}
const powerPreference = env.webgpu?.powerPreference;
if (powerPreference !== undefined && powerPreference !== 'low-power' && powerPreference !== 'high-performance') {
throw new Error(`Invalid powerPreference setting: "${powerPreference}"`);
}
const forceFallbackAdapter = env.webgpu?.forceFallbackAdapter;
if (forceFallbackAdapter !== undefined && typeof forceFallbackAdapter !== 'boolean') {
throw new Error(`Invalid forceFallbackAdapter setting: "${forceFallbackAdapter}"`);
}
const adapter = await navigator.gpu.requestAdapter({powerPreference, forceFallbackAdapter});
if (!adapter) {
throw new Error(
'Failed to get GPU adapter. You may need to enable flag "--enable-unsafe-webgpu" if you are using Chrome.');
}
if (!env.wasm.simd) {
throw new Error(
'Not supported for WebGPU=ON and SIMD=OFF. Please set `env.wasm.simd` to true when using `webgpu` EP');
}
await initJsep('webgpu', getInstance(), env, adapter);
}
if (epName === 'webnn') {
// perform WebNN availability check
if (typeof navigator === 'undefined' || !(navigator as unknown as {ml: unknown}).ml) {
throw new Error('WebNN is not supported in current environment');
}
await initJsep('webnn', getInstance(), env);
}
}
};
@ -380,7 +389,12 @@ export const prepareInputOutputTensor =
const gpuBuffer = tensor[2].gpuBuffer as GPUBuffer;
const elementSizeInBytes = getTensorElementSize(tensorDataTypeStringToEnum(dataType))!;
dataByteLength = dims.reduce((a, b) => a * b, 1) * elementSizeInBytes;
rawData = wasm.jsepRegisterBuffer(sessionId, index, gpuBuffer, dataByteLength);
const registerBuffer = wasm.jsepRegisterBuffer;
if (!registerBuffer) {
throw new Error('Tensor location "gpu-buffer" is not supported without using WebGPU.');
}
rawData = registerBuffer(sessionId, index, gpuBuffer, dataByteLength);
} else {
const data = tensor[2];
@ -595,7 +609,11 @@ export const run = async(
// If a certain output's preferred location is GPU but the tensor is empty, we still need to create a CPU
// tensor for it. There is no mapping GPU buffer for an empty tensor.
if (preferredLocation === 'gpu-buffer' && size > 0) {
const gpuBuffer = wasm.jsepGetBuffer(dataOffset);
const getBuffer = wasm.jsepGetBuffer;
if (!getBuffer) {
throw new Error('preferredLocation "gpu-buffer" is not supported without using WebGPU.');
}
const gpuBuffer = getBuffer(dataOffset);
const elementSize = getTensorElementSize(dataType);
if (elementSize === undefined || !isGpuBufferSupportedType(type)) {
throw new Error(`Unsupported data type: ${type}`);
@ -607,7 +625,7 @@ export const run = async(
output.push([
type, dims, {
gpuBuffer,
download: wasm.jsepCreateDownloader(gpuBuffer, size * elementSize, type),
download: wasm.jsepCreateDownloader!(gpuBuffer, size * elementSize, type),
dispose: () => {
wasm._OrtReleaseTensor(tensor);
}

View file

@ -4,39 +4,27 @@
'use strict';
/**
* Mount external data files of a model to the virtual file system (MEMFS).
* Mount external data files of a model to an internal map, which will be used during session initialization.
*
* @param {string} externalDataFilesPath
* @param {Uint8Array} externalDataFilesData
*/
Module['mountExternalData'] = (externalDataFilePath, externalDataFileData) => {
const files = Module.MountedFiles || (Module.MountedFiles = new Map());
files.set(externalDataFilePath, externalDataFileData);
files.set(externalDataFilePath, externalDataFileData);
};
/**
* Unmount external data files of a model from the virtual file system (MEMFS).
* Unmount external data files of a model.
*/
Module['unmountExternalData'] = () => {
delete Module.MountedFiles;
};
/**
* init JSEP
* initialize JSEP for asyncify support.
*/
Module['jsepInit'] = (backend, alloc, free, copy, copyAsync, createKernel, releaseKernel, runKernel, captureBegin, captureEnd, replay) => {
Module.jsepBackend = backend;
Module.jsepAlloc = alloc;
Module.jsepFree = free;
Module.jsepCopy = copy;
Module.jsepCopyAsync = copyAsync;
Module.jsepCreateKernel = createKernel;
Module.jsepReleaseKernel = releaseKernel;
Module.jsepRunKernel = runKernel;
Module.jsepCaptureBegin = captureBegin;
Module.jsepCaptureEnd = captureEnd;
Module.jsepReplay = replay;
let jsepInitAsync = () => {
// This is a simplified version of cwrap() with options.async === true (-sASYNCIFY=1)
// It removes some overhead in cwarp() and ccall() that we don't need.
//
@ -143,7 +131,7 @@ Module['jsepInit'] = (backend, alloc, free, copy, copyAsync, createKernel, relea
}
// Flush the backend. This will submit all pending commands to the GPU.
backend['flush']();
Module.jsepBackend?.['flush']();
// Await all pending promises. This includes GPU validation promises for diagnostic purposes.
const errorPromises = state.errors;
@ -180,20 +168,46 @@ Module['jsepInit'] = (backend, alloc, free, copy, copyAsync, createKernel, relea
() => Module['_OrtBindInput'],
v => Module['_OrtBindInput'] = v);
// expose webgpu backend functions
Module['jsepRegisterBuffer'] = (sessionId, index, buffer, size) => {
return backend['registerBuffer'](sessionId, index, buffer, size);
};
Module['jsepGetBuffer'] = (dataId) => {
return backend['getBuffer'](dataId);
};
Module['jsepCreateDownloader'] = (gpuBuffer, size, type) => {
return backend['createDownloader'](gpuBuffer, size, type);
};
Module['jsepOnReleaseSession'] = sessionId => {
backend['onReleaseSession'](sessionId);
};
Module['jsepOnRunStart'] = sessionId => {
return backend['onRunStart'](sessionId);
};
// remove this function to make sure it is called only once.
jsepInitAsync = undefined;
};
/**
* initialize JSEP for WebGPU.
*/
Module['jsepInit'] = (name, params) => {
jsepInitAsync?.();
if (name === 'webgpu') {
[Module.jsepBackend,
Module.jsepAlloc,
Module.jsepFree,
Module.jsepCopy,
Module.jsepCopyAsync,
Module.jsepCreateKernel,
Module.jsepReleaseKernel,
Module.jsepRunKernel,
Module.jsepCaptureBegin,
Module.jsepCaptureEnd,
Module.jsepReplay] = params;
// expose webgpu backend functions
const backend = Module.jsepBackend;
Module['jsepRegisterBuffer'] = (sessionId, index, buffer, size) => {
return backend['registerBuffer'](sessionId, index, buffer, size);
};
Module['jsepGetBuffer'] = (dataId) => {
return backend['getBuffer'](dataId);
};
Module['jsepCreateDownloader'] = (gpuBuffer, size, type) => {
return backend['createDownloader'](gpuBuffer, size, type);
};
Module['jsepOnReleaseSession'] = sessionId => {
backend['onReleaseSession'](sessionId);
};
Module['jsepOnRunStart'] = sessionId => {
return backend['onRunStart'](sessionId);
};
}
};