mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-12 00:59:23 +00:00
[js/web] rewrite backend resolve to allow multiple EPs (#19735)
### Description This PR rewrite the backend resolve logic to support specifying multiple EPs. #### Backend The first version of ONNX Runtime Web actually carried some existing code from [ONNX.js](https://github.com/microsoft/onnxjs), which includes the "backend" concept. The original "backend" in ONNX.js is designed in a way assuming there is only one backend from user's backend hint list will be used. For example, in ONNX.js, if user specify a backend hint as `['webgl', 'wasm']`, ONNX.js will first try to use WebGL backend - if it loads successfully (the browser supports webgl), then "webgl" backend will be used and "wasm" will be ignored; otherwise, "webgl" will be ignored and try to load "wasm" backend. In short: only one backend will be used when initializing a session. #### Execution Provider Execution Provider, or EP, in ONNX Runtime is a different concept. One of the differences is that users are allow to specify multiple EPs, and if one does not support a particular kernel, it can fallback to other EP. This is a very common case when using a GPU EP in ONNX Runtime. #### Current Status: Backend v.s. EP Because of the history reasons mentioned above, the current status is quite confusing. There are **real backend**s, which means it's different implementation in code; and there are **backend hint**s, which are used as string names for backend hint; and there are **EP**s of the ONNX Runtime concepts. currently there are only 2 **backend**s in our code base: The "onnxjs backend", and the "wasm backend". The "onnxjs backend" currently only powers backend hint "webgl", which go into the old onnx.js code path. All other backend hints including "wasm", "cpu"(alias to wasm), "webgpu" and "webnn" are all powered by "wasm backend". And because ORT Web treat "backend" as an internal concept and want to align with ONNX Runtime, so those names of backend hints are becoming EP names. The following table shows today's status: | Execution Provider Name (public) / Backend Hint (internal) | Backend | EP in ORT | -------- | ------- | ------- | | "wasm"/"cpu" | WasmBackend | CPU EP | "webgl" | OnnxjsBackend | \* technically not an EP | "webgpu" | WasmBackend | JSEP | "webnn" | WasmBackend | WebNN EP #### Problem While the API allows to specify multiple EPs, the backend resolving only allows one backend. This causes issues when user specify multiple EP names in session options, the backend resolve behavior and EP registration behavior is inconsistent. Specifically, in this issue: https://github.com/microsoft/onnxruntime/issues/15796#issuecomment-1925363908: EP list `['webgpu', 'wasm']` on a browser without WebGPU support resolves to 'wasm' backend, but the full EP list is passed in session options, so JSEP is still enabled, causing the runtime error. #### Solution Since we still need WebGL backend, we cannot totally remove the backend register/resolve system. In this PR I made the following changes: - initialize every backend from the EP list, instead of only do that for the first successful one. - for the first resolved backend, filter all EP using the exact same backend. Remove all EPs not using this backend from session options - for every explicitly specified EP, if it's removed, show a warning message in console
This commit is contained in:
parent
0b2a75b274
commit
79e50aeef3
8 changed files with 351 additions and 235 deletions
|
|
@ -2,6 +2,7 @@
|
|||
// Licensed under the MIT License.
|
||||
|
||||
import {Backend} from './backend.js';
|
||||
import {InferenceSession} from './inference-session.js';
|
||||
|
||||
interface BackendInfo {
|
||||
backend: Backend;
|
||||
|
|
@ -10,6 +11,7 @@ interface BackendInfo {
|
|||
initPromise?: Promise<void>;
|
||||
initialized?: boolean;
|
||||
aborted?: boolean;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
const backends: Map<string, BackendInfo> = new Map();
|
||||
|
|
@ -60,43 +62,100 @@ export const registerBackend = (name: string, backend: Backend, priority: number
|
|||
};
|
||||
|
||||
/**
|
||||
* Resolve backend by specified hints.
|
||||
* Try to resolve and initialize a backend.
|
||||
*
|
||||
* @param backendHints - a list of execution provider names to lookup. If omitted use registered backends as list.
|
||||
* @returns a promise that resolves to the backend.
|
||||
* @param backendName - the name of the backend.
|
||||
* @returns the backend instance if resolved and initialized successfully, or an error message if failed.
|
||||
*/
|
||||
const tryResolveAndInitializeBackend = async(backendName: string): Promise<Backend|string> => {
|
||||
const backendInfo = backends.get(backendName);
|
||||
if (!backendInfo) {
|
||||
return 'backend not found.';
|
||||
}
|
||||
|
||||
if (backendInfo.initialized) {
|
||||
return backendInfo.backend;
|
||||
} else if (backendInfo.aborted) {
|
||||
return backendInfo.error!;
|
||||
} else {
|
||||
const isInitializing = !!backendInfo.initPromise;
|
||||
try {
|
||||
if (!isInitializing) {
|
||||
backendInfo.initPromise = backendInfo.backend.init(backendName);
|
||||
}
|
||||
await backendInfo.initPromise;
|
||||
backendInfo.initialized = true;
|
||||
return backendInfo.backend;
|
||||
} catch (e) {
|
||||
if (!isInitializing) {
|
||||
backendInfo.error = `${e}`;
|
||||
backendInfo.aborted = true;
|
||||
}
|
||||
return backendInfo.error!;
|
||||
} finally {
|
||||
delete backendInfo.initPromise;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Resolve execution providers from the specific session options.
|
||||
*
|
||||
* @param options - the session options object.
|
||||
* @returns a promise that resolves to a tuple of an initialized backend instance and a session options object with
|
||||
* filtered EP list.
|
||||
*
|
||||
* @ignore
|
||||
*/
|
||||
export const resolveBackend = async(backendHints: readonly string[]): Promise<Backend> => {
|
||||
const backendNames = backendHints.length === 0 ? backendsSortedByPriority : backendHints;
|
||||
const errors = [];
|
||||
for (const backendName of backendNames) {
|
||||
const backendInfo = backends.get(backendName);
|
||||
if (backendInfo) {
|
||||
if (backendInfo.initialized) {
|
||||
return backendInfo.backend;
|
||||
} else if (backendInfo.aborted) {
|
||||
continue; // current backend is unavailable; try next
|
||||
export const resolveBackendAndExecutionProviders = async(options: InferenceSession.SessionOptions):
|
||||
Promise<[backend: Backend, options: InferenceSession.SessionOptions]> => {
|
||||
// extract backend hints from session options
|
||||
const eps = options.executionProviders || [];
|
||||
const backendHints = eps.map(i => typeof i === 'string' ? i : i.name);
|
||||
const backendNames = backendHints.length === 0 ? backendsSortedByPriority : backendHints;
|
||||
|
||||
// try to resolve and initialize all requested backends
|
||||
let backend: Backend|undefined;
|
||||
const errors = [];
|
||||
const availableBackendNames = new Set<string>();
|
||||
for (const backendName of backendNames) {
|
||||
const resolveResult = await tryResolveAndInitializeBackend(backendName);
|
||||
if (typeof resolveResult === 'string') {
|
||||
errors.push({name: backendName, err: resolveResult});
|
||||
} else {
|
||||
if (!backend) {
|
||||
backend = resolveResult;
|
||||
}
|
||||
if (backend === resolveResult) {
|
||||
availableBackendNames.add(backendName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const isInitializing = !!backendInfo.initPromise;
|
||||
try {
|
||||
if (!isInitializing) {
|
||||
backendInfo.initPromise = backendInfo.backend.init(backendName);
|
||||
}
|
||||
await backendInfo.initPromise;
|
||||
backendInfo.initialized = true;
|
||||
return backendInfo.backend;
|
||||
} catch (e) {
|
||||
if (!isInitializing) {
|
||||
errors.push({name: backendName, err: e});
|
||||
}
|
||||
backendInfo.aborted = true;
|
||||
} finally {
|
||||
delete backendInfo.initPromise;
|
||||
// if no backend is available, throw error.
|
||||
if (!backend) {
|
||||
throw new Error(`no available backend found. ERR: ${errors.map(e => `[${e.name}] ${e.err}`).join(', ')}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`no available backend found. ERR: ${errors.map(e => `[${e.name}] ${e.err}`).join(', ')}`);
|
||||
};
|
||||
// for each explicitly requested backend, if it's not available, output warning message.
|
||||
for (const {name, err} of errors) {
|
||||
if (backendHints.includes(name)) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.warn(`removing requested execution provider "${
|
||||
name}" from session options because it is not available: ${err}`);
|
||||
}
|
||||
}
|
||||
|
||||
const filteredEps = eps.filter(i => availableBackendNames.has(typeof i === 'string' ? i : i.name));
|
||||
|
||||
return [
|
||||
backend, new Proxy(options, {
|
||||
get: (target, prop) => {
|
||||
if (prop === 'executionProviders') {
|
||||
return filteredEps;
|
||||
}
|
||||
return Reflect.get(target, prop);
|
||||
}
|
||||
})
|
||||
];
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
import {resolveBackend} from './backend-impl.js';
|
||||
import {resolveBackendAndExecutionProviders} from './backend-impl.js';
|
||||
import {InferenceSessionHandler} from './backend.js';
|
||||
import {InferenceSession as InferenceSessionInterface} from './inference-session.js';
|
||||
import {OnnxValue} from './onnx-value.js';
|
||||
|
|
@ -195,11 +195,9 @@ export class InferenceSession implements InferenceSessionInterface {
|
|||
throw new TypeError('Unexpected argument[0]: must be \'path\' or \'buffer\'.');
|
||||
}
|
||||
|
||||
// get backend hints
|
||||
const eps = options.executionProviders || [];
|
||||
const backendHints = eps.map(i => typeof i === 'string' ? i : i.name);
|
||||
const backend = await resolveBackend(backendHints);
|
||||
const handler = await backend.createInferenceSessionHandler(filePathOrUint8Array, options);
|
||||
// resolve backend, update session options with validated EPs, and create session handler
|
||||
const [backend, optionsWithValidatedEPs] = await resolveBackendAndExecutionProviders(options);
|
||||
const handler = await backend.createInferenceSessionHandler(filePathOrUint8Array, optionsWithValidatedEPs);
|
||||
TRACE_FUNC_END();
|
||||
return new InferenceSession(handler);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
import {resolveBackend} from './backend-impl.js';
|
||||
import {resolveBackendAndExecutionProviders} from './backend-impl.js';
|
||||
import {SessionHandler, TrainingSessionHandler} from './backend.js';
|
||||
import {InferenceSession as InferenceSession} from './inference-session.js';
|
||||
import {OnnxValue} from './onnx-value.js';
|
||||
|
|
@ -55,13 +55,12 @@ export class TrainingSession implements TrainingSessionInterface {
|
|||
const optimizerModel: string|Uint8Array = trainingOptions.optimizerModel || '';
|
||||
const options: SessionOptions = sessionOptions || {};
|
||||
|
||||
// get backend hints
|
||||
const eps = options.executionProviders || [];
|
||||
const backendHints = eps.map(i => typeof i === 'string' ? i : i.name);
|
||||
const backend = await resolveBackend(backendHints);
|
||||
// resolve backend, update session options with validated EPs, and create session handler
|
||||
const [backend, optionsWithValidatedEPs] = await resolveBackendAndExecutionProviders(options);
|
||||
if (backend.createTrainingSessionHandler) {
|
||||
const handler = await backend.createTrainingSessionHandler(
|
||||
trainingOptions.checkpointState, trainingOptions.trainModel, evalModel, optimizerModel, options);
|
||||
trainingOptions.checkpointState, trainingOptions.trainModel, evalModel, optimizerModel,
|
||||
optionsWithValidatedEPs);
|
||||
return new TrainingSession(handler, !!trainingOptions.optimizerModel, !!trainingOptions.evalModel);
|
||||
} else {
|
||||
throw new Error(noBackendErrMsg);
|
||||
|
|
|
|||
240
js/web/lib/wasm/binding/ort-wasm.d.ts
vendored
240
js/web/lib/wasm/binding/ort-wasm.d.ts
vendored
|
|
@ -16,20 +16,97 @@ export declare namespace JSEP {
|
|||
type CaptureBeginFunction = () => void;
|
||||
type CaptureEndFunction = () => void;
|
||||
type ReplayFunction = () => void;
|
||||
|
||||
export interface Module extends WebGpuModule {
|
||||
/**
|
||||
* Mount the external data file to an internal map, which will be used during session initialization.
|
||||
*
|
||||
* @param externalDataFilePath - specify the relative path of the external data file.
|
||||
* @param externalDataFileData - specify the content data.
|
||||
*/
|
||||
mountExternalData(externalDataFilePath: string, externalDataFileData: Uint8Array): void;
|
||||
/**
|
||||
* Unmount all external data files from the internal map.
|
||||
*/
|
||||
unmountExternalData(): void;
|
||||
|
||||
/**
|
||||
* This is the entry of JSEP initialization. This function is called once when initializing ONNX Runtime per
|
||||
* backend. This function initializes Asyncify support. If name is 'webgpu', also initializes WebGPU backend and
|
||||
* registers a few callbacks that will be called in C++ code.
|
||||
*/
|
||||
jsepInit(name: 'webgpu', initParams: [
|
||||
backend: BackendType, alloc: AllocFunction, free: FreeFunction, upload: UploadFunction,
|
||||
download: DownloadFunction, createKernel: CreateKernelFunction, releaseKernel: ReleaseKernelFunction,
|
||||
run: RunFunction, captureBegin: CaptureBeginFunction, captureEnd: CaptureEndFunction, replay: ReplayFunction
|
||||
]): void;
|
||||
jsepInit(name: 'webnn', initParams?: never): void;
|
||||
}
|
||||
|
||||
export interface WebGpuModule {
|
||||
/**
|
||||
* [exported from wasm] Specify a kernel's output when running OpKernel::Compute().
|
||||
*
|
||||
* @param context - specify the kernel context pointer.
|
||||
* @param index - specify the index of the output.
|
||||
* @param data - specify the pointer to encoded data of type and dims.
|
||||
*/
|
||||
_JsepOutput(context: number, index: number, data: number): number;
|
||||
/**
|
||||
* [exported from wasm] Get name of an operator node.
|
||||
*
|
||||
* @param kernel - specify the kernel pointer.
|
||||
* @returns the pointer to a C-style UTF8 encoded string representing the node name.
|
||||
*/
|
||||
_JsepGetNodeName(kernel: number): number;
|
||||
|
||||
/**
|
||||
* [exported from js_internal_api.js] Register a user GPU buffer for usage of a session's input or output.
|
||||
*
|
||||
* @param sessionId - specify the session ID.
|
||||
* @param index - specify an integer to represent which input/output it is registering for. For input, it is the
|
||||
* input_index corresponding to the session's inputNames. For output, it is the inputCount + output_index
|
||||
* corresponding to the session's ouputNames.
|
||||
* @param buffer - specify the GPU buffer to register.
|
||||
* @param size - specify the original data size in byte.
|
||||
* @returns the GPU data ID for the registered GPU buffer.
|
||||
*/
|
||||
jsepRegisterBuffer: (sessionId: number, index: number, buffer: GPUBuffer, size: number) => number;
|
||||
/**
|
||||
* [exported from js_internal_api.js] Get the GPU buffer by GPU data ID.
|
||||
*
|
||||
* @param dataId - specify the GPU data ID
|
||||
* @returns the GPU buffer.
|
||||
*/
|
||||
jsepGetBuffer: (dataId: number) => GPUBuffer;
|
||||
/**
|
||||
* [exported from js_internal_api.js] Create a function to be used to create a GPU Tensor.
|
||||
*
|
||||
* @param gpuBuffer - specify the GPU buffer
|
||||
* @param size - specify the original data size in byte.
|
||||
* @param type - specify the tensor type.
|
||||
* @returns the generated downloader function.
|
||||
*/
|
||||
jsepCreateDownloader:
|
||||
(gpuBuffer: GPUBuffer, size: number,
|
||||
type: Tensor.GpuBufferDataTypes) => () => Promise<Tensor.DataTypeMap[Tensor.GpuBufferDataTypes]>;
|
||||
/**
|
||||
* [exported from js_internal_api.js] Called when InferenceSession.run started. This function will be called before
|
||||
* _OrtRun[WithBinding]() is called.
|
||||
* @param sessionId - specify the session ID.
|
||||
*/
|
||||
jsepOnRunStart: (sessionId: number) => void;
|
||||
/**
|
||||
* [exported from js_internal_api.js] Release a session. This function will be called before _OrtReleaseSession() is
|
||||
* called.
|
||||
* @param sessionId - specify the session ID.
|
||||
* @returns
|
||||
*/
|
||||
jsepOnReleaseSession: (sessionId: number) => void;
|
||||
}
|
||||
}
|
||||
|
||||
export interface OrtWasmModule extends EmscriptenModule {
|
||||
// #region emscripten functions
|
||||
stackSave(): number;
|
||||
stackRestore(stack: number): void;
|
||||
stackAlloc(size: number): number;
|
||||
|
||||
UTF8ToString(offset: number, maxBytesToRead?: number): string;
|
||||
lengthBytesUTF8(str: string): number;
|
||||
stringToUTF8(str: string, offset: number, maxBytes: number): void;
|
||||
// #endregion
|
||||
|
||||
// #region ORT APIs
|
||||
export interface OrtInferenceAPIs {
|
||||
_OrtInit(numThreads: number, loggingLevel: number): number;
|
||||
|
||||
_OrtGetLastError(errorCodeOffset: number, errorMessageOffset: number): void;
|
||||
|
|
@ -74,126 +151,61 @@ export interface OrtWasmModule extends EmscriptenModule {
|
|||
_OrtReleaseRunOptions(runOptionsHandle: number): void;
|
||||
|
||||
_OrtEndProfiling(sessionHandle: number): number;
|
||||
// #endregion
|
||||
}
|
||||
|
||||
// #region ORT Training APIs
|
||||
_OrtTrainingLoadCheckpoint?(dataOffset: number, dataLength: number): number;
|
||||
export interface OrtTrainingAPIs {
|
||||
_OrtTrainingLoadCheckpoint(dataOffset: number, dataLength: number): number;
|
||||
|
||||
_OrtTrainingReleaseCheckpoint?(checkpointHandle: number): void;
|
||||
_OrtTrainingReleaseCheckpoint(checkpointHandle: number): void;
|
||||
|
||||
_OrtTrainingCreateSession?
|
||||
(sessionOptionsHandle: number, checkpointHandle: number, trainOffset: number, trainLength: number,
|
||||
evalOffset: number, evalLength: number, optimizerOffset: number, optimizerLength: number): number;
|
||||
_OrtTrainingCreateSession(
|
||||
sessionOptionsHandle: number, checkpointHandle: number, trainOffset: number, trainLength: number,
|
||||
evalOffset: number, evalLength: number, optimizerOffset: number, optimizerLength: number): number;
|
||||
|
||||
_OrtTrainingLazyResetGrad?(trainingHandle: number): number;
|
||||
_OrtTrainingLazyResetGrad(trainingHandle: number): number;
|
||||
|
||||
_OrtTrainingRunTrainStep?
|
||||
(trainingHandle: number, inputsOffset: number, inputCount: number, outputsOffset: number, outputCount: number,
|
||||
runOptionsHandle: number): number;
|
||||
_OrtTrainingRunTrainStep(
|
||||
trainingHandle: number, inputsOffset: number, inputCount: number, outputsOffset: number, outputCount: number,
|
||||
runOptionsHandle: number): number;
|
||||
|
||||
_OrtTrainingOptimizerStep?(trainingHandle: number, runOptionsHandle: number): number;
|
||||
_OrtTrainingOptimizerStep(trainingHandle: number, runOptionsHandle: number): number;
|
||||
|
||||
_OrtTrainingEvalStep?
|
||||
(trainingHandle: number, inputsOffset: number, inputCount: number, outputsOffset: number, outputCount: number,
|
||||
runOptionsHandle: number): number;
|
||||
_OrtTrainingEvalStep(
|
||||
trainingHandle: number, inputsOffset: number, inputCount: number, outputsOffset: number, outputCount: number,
|
||||
runOptionsHandle: number): number;
|
||||
|
||||
_OrtTrainingGetParametersSize?(trainingHandle: number, paramSizeT: number, trainableOnly: boolean): number;
|
||||
_OrtTrainingGetParametersSize(trainingHandle: number, paramSizeT: number, trainableOnly: boolean): number;
|
||||
|
||||
_OrtTrainingCopyParametersToBuffer?
|
||||
(trainingHandle: number, parametersBuffer: number, parameterCount: number, trainableOnly: boolean): number;
|
||||
_OrtTrainingCopyParametersToBuffer(
|
||||
trainingHandle: number, parametersBuffer: number, parameterCount: number, trainableOnly: boolean): number;
|
||||
|
||||
_OrtTrainingCopyParametersFromBuffer?
|
||||
(trainingHandle: number, parametersBuffer: number, parameterCount: number, trainableOnly: boolean): number;
|
||||
_OrtTrainingCopyParametersFromBuffer(
|
||||
trainingHandle: number, parametersBuffer: number, parameterCount: number, trainableOnly: boolean): number;
|
||||
|
||||
_OrtTrainingGetModelInputOutputCount?
|
||||
(trainingHandle: number, inputCount: number, outputCount: number, isEvalModel: boolean): number;
|
||||
_OrtTrainingGetModelInputOutputName?
|
||||
(trainingHandle: number, index: number, isInput: boolean, isEvalModel: boolean): number;
|
||||
_OrtTrainingGetModelInputOutputCount(
|
||||
trainingHandle: number, inputCount: number, outputCount: number, isEvalModel: boolean): number;
|
||||
_OrtTrainingGetModelInputOutputName(trainingHandle: number, index: number, isInput: boolean, isEvalModel: boolean):
|
||||
number;
|
||||
|
||||
_OrtTrainingReleaseSession?(trainingHandle: number): void;
|
||||
_OrtTrainingReleaseSession(trainingHandle: number): void;
|
||||
}
|
||||
|
||||
export interface OrtWasmModule extends EmscriptenModule, OrtInferenceAPIs, Partial<OrtTrainingAPIs>,
|
||||
Partial<JSEP.Module> {
|
||||
// #region emscripten functions
|
||||
stackSave(): number;
|
||||
stackRestore(stack: number): void;
|
||||
stackAlloc(size: number): number;
|
||||
|
||||
UTF8ToString(offset: number, maxBytesToRead?: number): string;
|
||||
lengthBytesUTF8(str: string): number;
|
||||
stringToUTF8(str: string, offset: number, maxBytes: number): void;
|
||||
// #endregion
|
||||
|
||||
// #region config
|
||||
numThreads?: number;
|
||||
mainScriptUrlOrBlob?: string|Blob;
|
||||
// #endregion
|
||||
|
||||
// #region external data API
|
||||
mountExternalData?(externalDataFilePath: string, externalDataFileData: Uint8Array): void;
|
||||
unmountExternalData?(): void;
|
||||
// #endregion
|
||||
|
||||
// #region JSEP
|
||||
/**
|
||||
* This is the entry of JSEP initialization. This function is called once when initializing ONNX Runtime.
|
||||
* This function initializes WebGPU backend and registers a few callbacks that will be called in C++ code.
|
||||
*/
|
||||
jsepInit?
|
||||
(backend: JSEP.BackendType, alloc: JSEP.AllocFunction, free: JSEP.FreeFunction, upload: JSEP.UploadFunction,
|
||||
download: JSEP.DownloadFunction, createKernel: JSEP.CreateKernelFunction,
|
||||
releaseKernel: JSEP.ReleaseKernelFunction, run: JSEP.RunFunction, captureBegin: JSEP.CaptureBeginFunction,
|
||||
captureEnd: JSEP.CaptureEndFunction, replay: JSEP.ReplayFunction): void;
|
||||
|
||||
/**
|
||||
* [exported from wasm] Specify a kernel's output when running OpKernel::Compute().
|
||||
*
|
||||
* @param context - specify the kernel context pointer.
|
||||
* @param index - specify the index of the output.
|
||||
* @param data - specify the pointer to encoded data of type and dims.
|
||||
*/
|
||||
_JsepOutput(context: number, index: number, data: number): number;
|
||||
/**
|
||||
* [exported from wasm] Get name of an operator node.
|
||||
*
|
||||
* @param kernel - specify the kernel pointer.
|
||||
* @returns the pointer to a C-style UTF8 encoded string representing the node name.
|
||||
*/
|
||||
_JsepGetNodeName(kernel: number): number;
|
||||
|
||||
/**
|
||||
* [exported from js_internal_api.js] Register a user GPU buffer for usage of a session's input or output.
|
||||
*
|
||||
* @param sessionId - specify the session ID.
|
||||
* @param index - specify an integer to represent which input/output it is registering for. For input, it is the
|
||||
* input_index corresponding to the session's inputNames. For output, it is the inputCount + output_index
|
||||
* corresponding to the session's ouputNames.
|
||||
* @param buffer - specify the GPU buffer to register.
|
||||
* @param size - specify the original data size in byte.
|
||||
* @returns the GPU data ID for the registered GPU buffer.
|
||||
*/
|
||||
jsepRegisterBuffer: (sessionId: number, index: number, buffer: GPUBuffer, size: number) => number;
|
||||
/**
|
||||
* [exported from js_internal_api.js] Get the GPU buffer by GPU data ID.
|
||||
*
|
||||
* @param dataId - specify the GPU data ID
|
||||
* @returns the GPU buffer.
|
||||
*/
|
||||
jsepGetBuffer: (dataId: number) => GPUBuffer;
|
||||
/**
|
||||
* [exported from js_internal_api.js] Create a function to be used to create a GPU Tensor.
|
||||
*
|
||||
* @param gpuBuffer - specify the GPU buffer
|
||||
* @param size - specify the original data size in byte.
|
||||
* @param type - specify the tensor type.
|
||||
* @returns the generated downloader function.
|
||||
*/
|
||||
jsepCreateDownloader:
|
||||
(gpuBuffer: GPUBuffer, size: number,
|
||||
type: Tensor.GpuBufferDataTypes) => () => Promise<Tensor.DataTypeMap[Tensor.GpuBufferDataTypes]>;
|
||||
/**
|
||||
* [exported from js_internal_api.js] Called when InferenceSession.run started. This function will be called before
|
||||
* _OrtRun[WithBinding]() is called.
|
||||
* @param sessionId - specify the session ID.
|
||||
*/
|
||||
jsepOnRunStart: (sessionId: number) => void;
|
||||
/**
|
||||
* [exported from js_internal_api.js] Release a session. This function will be called before _OrtReleaseSession() is
|
||||
* called.
|
||||
* @param sessionId - specify the session ID.
|
||||
* @returns
|
||||
*/
|
||||
jsepOnReleaseSession: (sessionId: number) => void;
|
||||
// #endregion
|
||||
}
|
||||
|
||||
declare const moduleFactory: EmscriptenModuleFactory<OrtWasmModule>;
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ class ComputeContextImpl implements ComputeContext {
|
|||
for (let i = 0; i < dims.length; i++) {
|
||||
this.module.HEAPU32[offset++] = dims[i];
|
||||
}
|
||||
return this.module._JsepOutput(this.opKernelContext, index, data);
|
||||
return this.module._JsepOutput!(this.opKernelContext, index, data);
|
||||
} catch (e) {
|
||||
throw new Error(
|
||||
`Failed to generate kernel's output[${index}] with dims [${dims}]. ` +
|
||||
|
|
@ -136,27 +136,39 @@ class ComputeContextImpl implements ComputeContext {
|
|||
/**
|
||||
* Initialize JSEP with WebGPU backend.
|
||||
*
|
||||
* This function will be called only once after the WebAssembly module is loaded and initialized ("_OrtInit" is called).
|
||||
* This function expects:
|
||||
* This function will be called after the WebAssembly module is loaded and initialized ("_OrtInit" is called), once for
|
||||
* each of the following EPs if they are specified:
|
||||
* - "webgpu"
|
||||
* - "webnn"
|
||||
*
|
||||
* For WebGPU, this function expects:
|
||||
* - WebGPU is enabled in build (BUILD_DEFS.DISABLE_WEBGPU === false).
|
||||
* - WebGPU is available in current environment. (a valid GPUAdapter is passed in)
|
||||
* If the WebAssembly module is not built with JSEP support, this function will throw an error. This will invalidate
|
||||
* 'webgpu' backend.
|
||||
*
|
||||
* For WebNN, this function expects:
|
||||
* - WebNN is enabled in build (BUILD_DEFS.DISABLE_WEBGPU === false).
|
||||
* - WebNN is available in current environment. (navigator.ml is not undefined)
|
||||
*
|
||||
* If the WebAssembly module is not built with JSEP support, this function will throw an error. This will invalidate
|
||||
* 'webgpu'/'webnn' backend.
|
||||
*
|
||||
* @param name - the name of the EP, either "webgpu" or "webnn"
|
||||
* @param module - the ORT WebAssembly module
|
||||
* @param env - the ORT environment variable (ort.env)
|
||||
* @param gpuAdapter - the pre-created GPU adapter
|
||||
*/
|
||||
export const init = async(module: OrtWasmModule, env: Env, gpuAdapter: GPUAdapter): Promise<void> => {
|
||||
export const init =
|
||||
async(name: 'webgpu'|'webnn', module: OrtWasmModule, env: Env, gpuAdapter?: GPUAdapter): Promise<void> => {
|
||||
const jsepInit = module.jsepInit;
|
||||
if (!jsepInit) {
|
||||
throw new Error('Failed to initialize JSEP. The WebAssembly module is not built with JSEP support.');
|
||||
}
|
||||
|
||||
const backend = new WebGpuBackend();
|
||||
await backend.initialize(env, gpuAdapter);
|
||||
if (name === 'webgpu') {
|
||||
const backend = new WebGpuBackend();
|
||||
await backend.initialize(env, gpuAdapter!);
|
||||
|
||||
jsepInit(
|
||||
jsepInit('webgpu', [
|
||||
// backend
|
||||
backend,
|
||||
|
||||
|
|
@ -190,8 +202,8 @@ export const init = async(module: OrtWasmModule, env: Env, gpuAdapter: GPUAdapte
|
|||
},
|
||||
|
||||
// jsepCreateKernel
|
||||
(kernelType: string, kernelId: number, attribute: unknown) =>
|
||||
backend.createKernel(kernelType, kernelId, attribute, module.UTF8ToString(module._JsepGetNodeName(kernelId))),
|
||||
(kernelType: string, kernelId: number, attribute: unknown) => backend.createKernel(
|
||||
kernelType, kernelId, attribute, module.UTF8ToString(module._JsepGetNodeName!(kernelId))),
|
||||
|
||||
// jsepReleaseKernel
|
||||
(kernel: number) => backend.releaseKernel(kernel),
|
||||
|
|
@ -210,5 +222,9 @@ export const init = async(module: OrtWasmModule, env: Env, gpuAdapter: GPUAdapte
|
|||
// jsepCaptureEnd
|
||||
() => backend.captureEnd(),
|
||||
// jsepReplay
|
||||
() => backend.replay());
|
||||
() => backend.replay()
|
||||
]);
|
||||
} else {
|
||||
jsepInit('webnn');
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -155,7 +155,7 @@ export const createSession =
|
|||
ensureWorker();
|
||||
return new Promise<SerializableSessionMetadata>((resolve, reject) => {
|
||||
enqueueCallbacks('create', [resolve, reject]);
|
||||
const message: OrtWasmMessage = {type: 'create', in : {model, options}};
|
||||
const message: OrtWasmMessage = {type: 'create', in : {model, options: {...options}}};
|
||||
const transferable: Transferable[] = [];
|
||||
if (model instanceof Uint8Array) {
|
||||
transferable.push(model.buffer);
|
||||
|
|
|
|||
|
|
@ -84,35 +84,44 @@ export const initRuntime = async(env: Env): Promise<void> => {
|
|||
* @param epName
|
||||
*/
|
||||
export const initEp = async(env: Env, epName: string): Promise<void> => {
|
||||
if (!BUILD_DEFS.DISABLE_WEBGPU && (epName === 'webgpu' || epName === 'webnn')) {
|
||||
// perform WebGPU availability check
|
||||
if (typeof navigator === 'undefined' || !navigator.gpu) {
|
||||
throw new Error('WebGPU is not supported in current environment');
|
||||
}
|
||||
const powerPreference = env.webgpu?.powerPreference;
|
||||
if (powerPreference !== undefined && powerPreference !== 'low-power' && powerPreference !== 'high-performance') {
|
||||
throw new Error(`Invalid powerPreference setting: "${powerPreference}"`);
|
||||
}
|
||||
const forceFallbackAdapter = env.webgpu?.forceFallbackAdapter;
|
||||
if (forceFallbackAdapter !== undefined && typeof forceFallbackAdapter !== 'boolean') {
|
||||
throw new Error(`Invalid forceFallbackAdapter setting: "${forceFallbackAdapter}"`);
|
||||
}
|
||||
const adapter = await navigator.gpu.requestAdapter({powerPreference, forceFallbackAdapter});
|
||||
if (!adapter) {
|
||||
throw new Error(
|
||||
'Failed to get GPU adapter. You may need to enable flag "--enable-unsafe-webgpu" if you are using Chrome.');
|
||||
}
|
||||
|
||||
if (!env.wasm.simd) {
|
||||
throw new Error(
|
||||
'Not supported for WebGPU=ON and SIMD=OFF. Please set `env.wasm.simd` to true when using `webgpu` EP');
|
||||
}
|
||||
|
||||
// init JSEP if available
|
||||
|
||||
if (!BUILD_DEFS.DISABLE_WEBGPU) {
|
||||
// eslint-disable-next-line @typescript-eslint/no-require-imports, @typescript-eslint/no-var-requires
|
||||
const initJsep = require('./jsep/init').init;
|
||||
await initJsep(getInstance(), env, adapter);
|
||||
|
||||
if (epName === 'webgpu') {
|
||||
// perform WebGPU availability check
|
||||
if (typeof navigator === 'undefined' || !navigator.gpu) {
|
||||
throw new Error('WebGPU is not supported in current environment');
|
||||
}
|
||||
const powerPreference = env.webgpu?.powerPreference;
|
||||
if (powerPreference !== undefined && powerPreference !== 'low-power' && powerPreference !== 'high-performance') {
|
||||
throw new Error(`Invalid powerPreference setting: "${powerPreference}"`);
|
||||
}
|
||||
const forceFallbackAdapter = env.webgpu?.forceFallbackAdapter;
|
||||
if (forceFallbackAdapter !== undefined && typeof forceFallbackAdapter !== 'boolean') {
|
||||
throw new Error(`Invalid forceFallbackAdapter setting: "${forceFallbackAdapter}"`);
|
||||
}
|
||||
const adapter = await navigator.gpu.requestAdapter({powerPreference, forceFallbackAdapter});
|
||||
if (!adapter) {
|
||||
throw new Error(
|
||||
'Failed to get GPU adapter. You may need to enable flag "--enable-unsafe-webgpu" if you are using Chrome.');
|
||||
}
|
||||
|
||||
if (!env.wasm.simd) {
|
||||
throw new Error(
|
||||
'Not supported for WebGPU=ON and SIMD=OFF. Please set `env.wasm.simd` to true when using `webgpu` EP');
|
||||
}
|
||||
|
||||
await initJsep('webgpu', getInstance(), env, adapter);
|
||||
}
|
||||
if (epName === 'webnn') {
|
||||
// perform WebNN availability check
|
||||
if (typeof navigator === 'undefined' || !(navigator as unknown as {ml: unknown}).ml) {
|
||||
throw new Error('WebNN is not supported in current environment');
|
||||
}
|
||||
|
||||
await initJsep('webnn', getInstance(), env);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -380,7 +389,12 @@ export const prepareInputOutputTensor =
|
|||
const gpuBuffer = tensor[2].gpuBuffer as GPUBuffer;
|
||||
const elementSizeInBytes = getTensorElementSize(tensorDataTypeStringToEnum(dataType))!;
|
||||
dataByteLength = dims.reduce((a, b) => a * b, 1) * elementSizeInBytes;
|
||||
rawData = wasm.jsepRegisterBuffer(sessionId, index, gpuBuffer, dataByteLength);
|
||||
|
||||
const registerBuffer = wasm.jsepRegisterBuffer;
|
||||
if (!registerBuffer) {
|
||||
throw new Error('Tensor location "gpu-buffer" is not supported without using WebGPU.');
|
||||
}
|
||||
rawData = registerBuffer(sessionId, index, gpuBuffer, dataByteLength);
|
||||
} else {
|
||||
const data = tensor[2];
|
||||
|
||||
|
|
@ -595,7 +609,11 @@ export const run = async(
|
|||
// If a certain output's preferred location is GPU but the tensor is empty, we still need to create a CPU
|
||||
// tensor for it. There is no mapping GPU buffer for an empty tensor.
|
||||
if (preferredLocation === 'gpu-buffer' && size > 0) {
|
||||
const gpuBuffer = wasm.jsepGetBuffer(dataOffset);
|
||||
const getBuffer = wasm.jsepGetBuffer;
|
||||
if (!getBuffer) {
|
||||
throw new Error('preferredLocation "gpu-buffer" is not supported without using WebGPU.');
|
||||
}
|
||||
const gpuBuffer = getBuffer(dataOffset);
|
||||
const elementSize = getTensorElementSize(dataType);
|
||||
if (elementSize === undefined || !isGpuBufferSupportedType(type)) {
|
||||
throw new Error(`Unsupported data type: ${type}`);
|
||||
|
|
@ -607,7 +625,7 @@ export const run = async(
|
|||
output.push([
|
||||
type, dims, {
|
||||
gpuBuffer,
|
||||
download: wasm.jsepCreateDownloader(gpuBuffer, size * elementSize, type),
|
||||
download: wasm.jsepCreateDownloader!(gpuBuffer, size * elementSize, type),
|
||||
dispose: () => {
|
||||
wasm._OrtReleaseTensor(tensor);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,39 +4,27 @@
|
|||
'use strict';
|
||||
|
||||
/**
|
||||
* Mount external data files of a model to the virtual file system (MEMFS).
|
||||
* Mount external data files of a model to an internal map, which will be used during session initialization.
|
||||
*
|
||||
* @param {string} externalDataFilesPath
|
||||
* @param {Uint8Array} externalDataFilesData
|
||||
*/
|
||||
Module['mountExternalData'] = (externalDataFilePath, externalDataFileData) => {
|
||||
const files = Module.MountedFiles || (Module.MountedFiles = new Map());
|
||||
files.set(externalDataFilePath, externalDataFileData);
|
||||
files.set(externalDataFilePath, externalDataFileData);
|
||||
};
|
||||
|
||||
/**
|
||||
* Unmount external data files of a model from the virtual file system (MEMFS).
|
||||
* Unmount external data files of a model.
|
||||
*/
|
||||
Module['unmountExternalData'] = () => {
|
||||
delete Module.MountedFiles;
|
||||
};
|
||||
|
||||
/**
|
||||
* init JSEP
|
||||
* initialize JSEP for asyncify support.
|
||||
*/
|
||||
Module['jsepInit'] = (backend, alloc, free, copy, copyAsync, createKernel, releaseKernel, runKernel, captureBegin, captureEnd, replay) => {
|
||||
Module.jsepBackend = backend;
|
||||
Module.jsepAlloc = alloc;
|
||||
Module.jsepFree = free;
|
||||
Module.jsepCopy = copy;
|
||||
Module.jsepCopyAsync = copyAsync;
|
||||
Module.jsepCreateKernel = createKernel;
|
||||
Module.jsepReleaseKernel = releaseKernel;
|
||||
Module.jsepRunKernel = runKernel;
|
||||
Module.jsepCaptureBegin = captureBegin;
|
||||
Module.jsepCaptureEnd = captureEnd;
|
||||
Module.jsepReplay = replay;
|
||||
|
||||
let jsepInitAsync = () => {
|
||||
// This is a simplified version of cwrap() with options.async === true (-sASYNCIFY=1)
|
||||
// It removes some overhead in cwarp() and ccall() that we don't need.
|
||||
//
|
||||
|
|
@ -143,7 +131,7 @@ Module['jsepInit'] = (backend, alloc, free, copy, copyAsync, createKernel, relea
|
|||
}
|
||||
|
||||
// Flush the backend. This will submit all pending commands to the GPU.
|
||||
backend['flush']();
|
||||
Module.jsepBackend?.['flush']();
|
||||
|
||||
// Await all pending promises. This includes GPU validation promises for diagnostic purposes.
|
||||
const errorPromises = state.errors;
|
||||
|
|
@ -180,20 +168,46 @@ Module['jsepInit'] = (backend, alloc, free, copy, copyAsync, createKernel, relea
|
|||
() => Module['_OrtBindInput'],
|
||||
v => Module['_OrtBindInput'] = v);
|
||||
|
||||
// expose webgpu backend functions
|
||||
Module['jsepRegisterBuffer'] = (sessionId, index, buffer, size) => {
|
||||
return backend['registerBuffer'](sessionId, index, buffer, size);
|
||||
};
|
||||
Module['jsepGetBuffer'] = (dataId) => {
|
||||
return backend['getBuffer'](dataId);
|
||||
};
|
||||
Module['jsepCreateDownloader'] = (gpuBuffer, size, type) => {
|
||||
return backend['createDownloader'](gpuBuffer, size, type);
|
||||
};
|
||||
Module['jsepOnReleaseSession'] = sessionId => {
|
||||
backend['onReleaseSession'](sessionId);
|
||||
};
|
||||
Module['jsepOnRunStart'] = sessionId => {
|
||||
return backend['onRunStart'](sessionId);
|
||||
};
|
||||
// remove this function to make sure it is called only once.
|
||||
jsepInitAsync = undefined;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* initialize JSEP for WebGPU.
|
||||
*/
|
||||
Module['jsepInit'] = (name, params) => {
|
||||
jsepInitAsync?.();
|
||||
|
||||
if (name === 'webgpu') {
|
||||
[Module.jsepBackend,
|
||||
Module.jsepAlloc,
|
||||
Module.jsepFree,
|
||||
Module.jsepCopy,
|
||||
Module.jsepCopyAsync,
|
||||
Module.jsepCreateKernel,
|
||||
Module.jsepReleaseKernel,
|
||||
Module.jsepRunKernel,
|
||||
Module.jsepCaptureBegin,
|
||||
Module.jsepCaptureEnd,
|
||||
Module.jsepReplay] = params;
|
||||
|
||||
// expose webgpu backend functions
|
||||
const backend = Module.jsepBackend;
|
||||
Module['jsepRegisterBuffer'] = (sessionId, index, buffer, size) => {
|
||||
return backend['registerBuffer'](sessionId, index, buffer, size);
|
||||
};
|
||||
Module['jsepGetBuffer'] = (dataId) => {
|
||||
return backend['getBuffer'](dataId);
|
||||
};
|
||||
Module['jsepCreateDownloader'] = (gpuBuffer, size, type) => {
|
||||
return backend['createDownloader'](gpuBuffer, size, type);
|
||||
};
|
||||
Module['jsepOnReleaseSession'] = sessionId => {
|
||||
backend['onReleaseSession'](sessionId);
|
||||
};
|
||||
Module['jsepOnRunStart'] = sessionId => {
|
||||
return backend['onRunStart'](sessionId);
|
||||
};
|
||||
}
|
||||
};
|
||||
|
|
|
|||
Loading…
Reference in a new issue