onnxruntime/js/common/lib/inference-session-impl.ts
Yulong Wang 79e50aeef3
[js/web] rewrite backend resolve to allow multiple EPs (#19735)
### Description

This PR rewrite the backend resolve logic to support specifying multiple
EPs.

#### Backend

The first version of ONNX Runtime Web actually carried some existing
code from [ONNX.js](https://github.com/microsoft/onnxjs), which includes
the "backend" concept. The original "backend" in ONNX.js is designed in
a way assuming there is only one backend from user's backend hint list
will be used. For example, in ONNX.js, if user specify a backend hint as
`['webgl', 'wasm']`, ONNX.js will first try to use WebGL backend - if it
loads successfully (the browser supports webgl), then "webgl" backend
will be used and "wasm" will be ignored; otherwise, "webgl" will be
ignored and try to load "wasm" backend.

In short: only one backend will be used when initializing a session.

#### Execution Provider

Execution Provider, or EP, in ONNX Runtime is a different concept. One
of the differences is that users are allow to specify multiple EPs, and
if one does not support a particular kernel, it can fallback to other
EP. This is a very common case when using a GPU EP in ONNX Runtime.

#### Current Status: Backend v.s. EP

Because of the history reasons mentioned above, the current status is
quite confusing. There are **real backend**s, which means it's different
implementation in code; and there are **backend hint**s, which are used
as string names for backend hint; and there are **EP**s of the ONNX
Runtime concepts.

currently there are only 2 **backend**s in our code base: The "onnxjs
backend", and the "wasm backend". The "onnxjs backend" currently only
powers backend hint "webgl", which go into the old onnx.js code path.
All other backend hints including "wasm", "cpu"(alias to wasm), "webgpu"
and "webnn" are all powered by "wasm backend".

And because ORT Web treat "backend" as an internal concept and want to
align with ONNX Runtime, so those names of backend hints are becoming EP
names.

The following table shows today's status:

| Execution Provider Name (public) / Backend Hint (internal) | Backend |
EP in ORT
| -------- | ------- | ------- |
| "wasm"/"cpu" | WasmBackend | CPU EP
| "webgl" | OnnxjsBackend | \* technically not an EP
| "webgpu" | WasmBackend | JSEP
| "webnn" | WasmBackend | WebNN EP

#### Problem

While the API allows to specify multiple EPs, the backend resolving only
allows one backend. This causes issues when user specify multiple EP
names in session options, the backend resolve behavior and EP
registration behavior is inconsistent. Specifically, in this issue:
https://github.com/microsoft/onnxruntime/issues/15796#issuecomment-1925363908:

EP list `['webgpu', 'wasm']` on a browser without WebGPU support
resolves to 'wasm' backend, but the full EP list is passed in session
options, so JSEP is still enabled, causing the runtime error.


#### Solution

Since we still need WebGL backend, we cannot totally remove the backend
register/resolve system. In this PR I made the following changes:
- initialize every backend from the EP list, instead of only do that for
the first successful one.
- for the first resolved backend, filter all EP using the exact same
backend. Remove all EPs not using this backend from session options
- for every explicitly specified EP, if it's removed, show a warning
message in console
2024-03-15 11:47:45 -07:00

220 lines
8.4 KiB
TypeScript

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
import {resolveBackendAndExecutionProviders} from './backend-impl.js';
import {InferenceSessionHandler} from './backend.js';
import {InferenceSession as InferenceSessionInterface} from './inference-session.js';
import {OnnxValue} from './onnx-value.js';
import {Tensor} from './tensor.js';
import {TRACE_FUNC_BEGIN, TRACE_FUNC_END} from './trace.js';
type SessionOptions = InferenceSessionInterface.SessionOptions;
type RunOptions = InferenceSessionInterface.RunOptions;
type FeedsType = InferenceSessionInterface.FeedsType;
type FetchesType = InferenceSessionInterface.FetchesType;
type ReturnType = InferenceSessionInterface.ReturnType;
export class InferenceSession implements InferenceSessionInterface {
private constructor(handler: InferenceSessionHandler) {
this.handler = handler;
}
run(feeds: FeedsType, options?: RunOptions): Promise<ReturnType>;
run(feeds: FeedsType, fetches: FetchesType, options?: RunOptions): Promise<ReturnType>;
async run(feeds: FeedsType, arg1?: FetchesType|RunOptions, arg2?: RunOptions): Promise<ReturnType> {
TRACE_FUNC_BEGIN();
const fetches: {[name: string]: OnnxValue|null} = {};
let options: RunOptions = {};
// check inputs
if (typeof feeds !== 'object' || feeds === null || feeds instanceof Tensor || Array.isArray(feeds)) {
throw new TypeError(
'\'feeds\' must be an object that use input names as keys and OnnxValue as corresponding values.');
}
let isFetchesEmpty = true;
// determine which override is being used
if (typeof arg1 === 'object') {
if (arg1 === null) {
throw new TypeError('Unexpected argument[1]: cannot be null.');
}
if (arg1 instanceof Tensor) {
throw new TypeError('\'fetches\' cannot be a Tensor');
}
if (Array.isArray(arg1)) {
if (arg1.length === 0) {
throw new TypeError('\'fetches\' cannot be an empty array.');
}
isFetchesEmpty = false;
// output names
for (const name of arg1) {
if (typeof name !== 'string') {
throw new TypeError('\'fetches\' must be a string array or an object.');
}
if (this.outputNames.indexOf(name) === -1) {
throw new RangeError(`'fetches' contains invalid output name: ${name}.`);
}
fetches[name] = null;
}
if (typeof arg2 === 'object' && arg2 !== null) {
options = arg2;
} else if (typeof arg2 !== 'undefined') {
throw new TypeError('\'options\' must be an object.');
}
} else {
// decide whether arg1 is fetches or options
// if any output name is present and its value is valid OnnxValue, we consider it fetches
let isFetches = false;
const arg1Keys = Object.getOwnPropertyNames(arg1);
for (const name of this.outputNames) {
if (arg1Keys.indexOf(name) !== -1) {
const v = (arg1 as InferenceSessionInterface.NullableOnnxValueMapType)[name];
if (v === null || v instanceof Tensor) {
isFetches = true;
isFetchesEmpty = false;
fetches[name] = v;
}
}
}
if (isFetches) {
if (typeof arg2 === 'object' && arg2 !== null) {
options = arg2;
} else if (typeof arg2 !== 'undefined') {
throw new TypeError('\'options\' must be an object.');
}
} else {
options = arg1 as RunOptions;
}
}
} else if (typeof arg1 !== 'undefined') {
throw new TypeError('Unexpected argument[1]: must be \'fetches\' or \'options\'.');
}
// check if all inputs are in feed
for (const name of this.inputNames) {
if (typeof feeds[name] === 'undefined') {
throw new Error(`input '${name}' is missing in 'feeds'.`);
}
}
// if no fetches is specified, we use the full output names list
if (isFetchesEmpty) {
for (const name of this.outputNames) {
fetches[name] = null;
}
}
// feeds, fetches and options are prepared
const results = await this.handler.run(feeds, fetches, options);
const returnValue: {[name: string]: OnnxValue} = {};
for (const key in results) {
if (Object.hasOwnProperty.call(results, key)) {
const result = results[key];
if (result instanceof Tensor) {
returnValue[key] = result;
} else {
returnValue[key] = new Tensor(result.type, result.data, result.dims);
}
}
}
TRACE_FUNC_END();
return returnValue;
}
async release(): Promise<void> {
return this.handler.dispose();
}
static create(path: string, options?: SessionOptions): Promise<InferenceSessionInterface>;
static create(buffer: ArrayBufferLike, options?: SessionOptions): Promise<InferenceSessionInterface>;
static create(buffer: ArrayBufferLike, byteOffset: number, byteLength?: number, options?: SessionOptions):
Promise<InferenceSessionInterface>;
static create(buffer: Uint8Array, options?: SessionOptions): Promise<InferenceSessionInterface>;
static async create(
arg0: string|ArrayBufferLike|Uint8Array, arg1?: SessionOptions|number, arg2?: number,
arg3?: SessionOptions): Promise<InferenceSessionInterface> {
TRACE_FUNC_BEGIN();
// either load from a file or buffer
let filePathOrUint8Array: string|Uint8Array;
let options: SessionOptions = {};
if (typeof arg0 === 'string') {
filePathOrUint8Array = arg0;
if (typeof arg1 === 'object' && arg1 !== null) {
options = arg1;
} else if (typeof arg1 !== 'undefined') {
throw new TypeError('\'options\' must be an object.');
}
} else if (arg0 instanceof Uint8Array) {
filePathOrUint8Array = arg0;
if (typeof arg1 === 'object' && arg1 !== null) {
options = arg1;
} else if (typeof arg1 !== 'undefined') {
throw new TypeError('\'options\' must be an object.');
}
} else if (
arg0 instanceof ArrayBuffer ||
(typeof SharedArrayBuffer !== 'undefined' && arg0 instanceof SharedArrayBuffer)) {
const buffer = arg0;
let byteOffset = 0;
let byteLength = arg0.byteLength;
if (typeof arg1 === 'object' && arg1 !== null) {
options = arg1;
} else if (typeof arg1 === 'number') {
byteOffset = arg1;
if (!Number.isSafeInteger(byteOffset)) {
throw new RangeError('\'byteOffset\' must be an integer.');
}
if (byteOffset < 0 || byteOffset >= buffer.byteLength) {
throw new RangeError(`'byteOffset' is out of range [0, ${buffer.byteLength}).`);
}
byteLength = arg0.byteLength - byteOffset;
if (typeof arg2 === 'number') {
byteLength = arg2;
if (!Number.isSafeInteger(byteLength)) {
throw new RangeError('\'byteLength\' must be an integer.');
}
if (byteLength <= 0 || byteOffset + byteLength > buffer.byteLength) {
throw new RangeError(`'byteLength' is out of range (0, ${buffer.byteLength - byteOffset}].`);
}
if (typeof arg3 === 'object' && arg3 !== null) {
options = arg3;
} else if (typeof arg3 !== 'undefined') {
throw new TypeError('\'options\' must be an object.');
}
} else if (typeof arg2 !== 'undefined') {
throw new TypeError('\'byteLength\' must be a number.');
}
} else if (typeof arg1 !== 'undefined') {
throw new TypeError('\'options\' must be an object.');
}
filePathOrUint8Array = new Uint8Array(buffer, byteOffset, byteLength);
} else {
throw new TypeError('Unexpected argument[0]: must be \'path\' or \'buffer\'.');
}
// resolve backend, update session options with validated EPs, and create session handler
const [backend, optionsWithValidatedEPs] = await resolveBackendAndExecutionProviders(options);
const handler = await backend.createInferenceSessionHandler(filePathOrUint8Array, optionsWithValidatedEPs);
TRACE_FUNC_END();
return new InferenceSession(handler);
}
startProfiling(): void {
this.handler.startProfiling();
}
endProfiling(): void {
this.handler.endProfiling();
}
get inputNames(): readonly string[] {
return this.handler.inputNames;
}
get outputNames(): readonly string[] {
return this.handler.outputNames;
}
private handler: InferenceSessionHandler;
}