2020-05-05 18:45:12 +00:00
|
|
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
|
|
|
// Licensed under the MIT License.
|
|
|
|
|
|
2023-06-12 19:05:11 +00:00
|
|
|
import {InferenceSession as InferenceSessionImpl} from './inference-session-impl.js';
|
2024-01-13 03:24:24 +00:00
|
|
|
import {OnnxModelOptions} from './onnx-model.js';
|
[js/api] introducing IO binding for tensor (#16452)
[//]: # (## Work In Progress. Feedbacks are welcome!)
### Description
This PR adds a few properties, methods and factories to Tensor type to
support IO-binding feature. This will allow user to create tensor from
GPU/CPU bound data without a force transferring of data between CPU and
GPU.
This change is a way to resolve #15312
### Change Summary
1. Add properties to `Tensor` type:
a. `location`: indicating where the data is sitting. valid values are
`cpu`, `cpu-pinned`, `texture`, `gpu-buffer`.
b. `texture`: sit side to `data`, a readonly property of `WebGLTexture`
type. available only when `location === 'texture'`
c. `gpuBuffer`: sit side to `data`, a readonly property of `GPUBuffer`
type. available only when `location === 'gpu-buffer'`
2. Add methods to `Tensor` type (usually dealing with inference
outputs):
- async function `getData()` allows user to download data from GPU to
CPU manually.
- function `dispose()` allows user to release GPU resources manually.
3. Add factories for creating `Tensor` instances:
a. `fromTexture()` to create a WebGL texture bound tensor data
b. `fromGpuBuffer()` to create a WebGPUBuffer bound tensor data
c. `fromPinnedBuffer()` to create a tensor using a CPU pinned buffer
### Examples:
create tensors from texture and pass to inference session as inputs
```js
// when create session, specify we prefer 'image_output:0' to be stored on GPU as texture
const session = await InferenceSession.create('./my_model.onnx', {
executionProviders: [ 'webgl' ],
preferredOutputLocation: { 'image_output:0': 'texture' }
});
...
const myImageTexture = getTexture(); // user's function to get a texture
const myFeeds = { input0: Tensor.fromTexture(myImageTexture, { width: 224, height: 224 }) }; // shape [1, 224, 224, 4], RGBA format.
const results = await session.run(myFeeds);
const myOutputTexture = results['image_output:0'].texture;
```
2023-08-29 19:58:26 +00:00
|
|
|
import {OnnxValue, OnnxValueDataLocation} from './onnx-value.js';
|
2020-05-05 18:45:12 +00:00
|
|
|
|
2021-04-16 08:33:10 +00:00
|
|
|
/* eslint-disable @typescript-eslint/no-redeclare */
|
2020-05-05 18:45:12 +00:00
|
|
|
|
|
|
|
|
export declare namespace InferenceSession {
|
2022-05-04 06:41:36 +00:00
|
|
|
// #region input/output types
|
2020-05-05 18:45:12 +00:00
|
|
|
|
|
|
|
|
type OnnxValueMapType = {readonly [name: string]: OnnxValue};
|
|
|
|
|
type NullableOnnxValueMapType = {readonly [name: string]: OnnxValue | null};
|
|
|
|
|
|
|
|
|
|
/**
|
2021-04-16 08:33:10 +00:00
|
|
|
* A feeds (model inputs) is an object that uses input names as keys and OnnxValue as corresponding values.
|
2020-05-05 18:45:12 +00:00
|
|
|
*/
|
|
|
|
|
type FeedsType = OnnxValueMapType;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* A fetches (model outputs) could be one of the following:
|
|
|
|
|
*
|
|
|
|
|
* - Omitted. Use model's output names definition.
|
|
|
|
|
* - An array of string indicating the output names.
|
|
|
|
|
* - An object that use output names as keys and OnnxValue or null as corresponding values.
|
|
|
|
|
*
|
2021-09-21 00:54:46 +00:00
|
|
|
* @remark
|
|
|
|
|
* different from input argument, in output, OnnxValue is optional. If an OnnxValue is present it will be
|
2020-05-05 18:45:12 +00:00
|
|
|
* used as a pre-allocated value by the inference engine; if omitted, inference engine will allocate buffer
|
|
|
|
|
* internally.
|
|
|
|
|
*/
|
|
|
|
|
type FetchesType = readonly string[]|NullableOnnxValueMapType;
|
|
|
|
|
|
2021-04-16 08:33:10 +00:00
|
|
|
/**
|
|
|
|
|
* A inferencing return type is an object that uses output names as keys and OnnxValue as corresponding values.
|
|
|
|
|
*/
|
|
|
|
|
type ReturnType = OnnxValueMapType;
|
2020-05-05 18:45:12 +00:00
|
|
|
|
2022-05-04 06:41:36 +00:00
|
|
|
// #endregion
|
2020-05-05 18:45:12 +00:00
|
|
|
|
2022-05-04 06:41:36 +00:00
|
|
|
// #region session options
|
2020-05-05 18:45:12 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* A set of configurations for session behavior.
|
|
|
|
|
*/
|
2024-01-13 03:24:24 +00:00
|
|
|
export interface SessionOptions extends OnnxModelOptions {
|
2020-05-05 18:45:12 +00:00
|
|
|
/**
|
|
|
|
|
* An array of execution provider options.
|
|
|
|
|
*
|
|
|
|
|
* An execution provider option can be a string indicating the name of the execution provider,
|
|
|
|
|
* or an object of corresponding type.
|
|
|
|
|
*/
|
2021-04-16 08:33:10 +00:00
|
|
|
executionProviders?: readonly ExecutionProviderConfig[];
|
2020-05-05 18:45:12 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* The intra OP threads number.
|
2021-04-16 08:33:10 +00:00
|
|
|
*
|
2021-05-07 19:12:37 +00:00
|
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native).
|
2020-05-05 18:45:12 +00:00
|
|
|
*/
|
|
|
|
|
intraOpNumThreads?: number;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* The inter OP threads number.
|
2021-04-16 08:33:10 +00:00
|
|
|
*
|
2021-05-07 19:12:37 +00:00
|
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native).
|
2020-05-05 18:45:12 +00:00
|
|
|
*/
|
|
|
|
|
interOpNumThreads?: number;
|
|
|
|
|
|
2023-09-13 16:17:34 +00:00
|
|
|
/**
|
|
|
|
|
* The free dimension override.
|
|
|
|
|
*
|
|
|
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native) or WebAssembly backend
|
|
|
|
|
*/
|
|
|
|
|
freeDimensionOverrides?: {readonly [dimensionName: string]: number};
|
|
|
|
|
|
2020-05-05 18:45:12 +00:00
|
|
|
/**
|
|
|
|
|
* The optimization level.
|
2021-04-16 08:33:10 +00:00
|
|
|
*
|
|
|
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native) or WebAssembly backend
|
2020-05-05 18:45:12 +00:00
|
|
|
*/
|
|
|
|
|
graphOptimizationLevel?: 'disabled'|'basic'|'extended'|'all';
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Whether enable CPU memory arena.
|
2021-04-16 08:33:10 +00:00
|
|
|
*
|
|
|
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native) or WebAssembly backend
|
2020-05-05 18:45:12 +00:00
|
|
|
*/
|
|
|
|
|
enableCpuMemArena?: boolean;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Whether enable memory pattern.
|
2021-04-16 08:33:10 +00:00
|
|
|
*
|
|
|
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native) or WebAssembly backend
|
2020-05-05 18:45:12 +00:00
|
|
|
*/
|
|
|
|
|
enableMemPattern?: boolean;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Execution mode.
|
2021-04-16 08:33:10 +00:00
|
|
|
*
|
|
|
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native) or WebAssembly backend
|
2020-05-05 18:45:12 +00:00
|
|
|
*/
|
|
|
|
|
executionMode?: 'sequential'|'parallel';
|
|
|
|
|
|
2023-02-24 23:50:15 +00:00
|
|
|
/**
|
|
|
|
|
* Optimized model file path.
|
|
|
|
|
*
|
|
|
|
|
* If this setting is specified, the optimized model will be dumped. In browser, a blob will be created
|
|
|
|
|
* with a pop-up window.
|
|
|
|
|
*/
|
|
|
|
|
optimizedModelFilePath?: string;
|
|
|
|
|
|
2021-05-17 21:57:19 +00:00
|
|
|
/**
|
[js/webgpu] Support capture and replay for jsep (#18989)
### Description
This PR expands the graph capture capability to JS EP, which is similar
to #16081. But for JS EP, we don't use the CUDA Graph, instead, we
records all gpu commands and replay them, which removes most of the cpu
overhead to avoid the the situation that gpu waiting for cpu.
mobilenetv2-12 becomes 3.7ms from 6ms on NV 3090 and becomes 3.38ms from
4.58ms on Intel A770.
All limitations are similar with CUDA EP:
1. Models with control-flow ops (i.e. If, Loop and Scan ops) are not
supported.
2. Usage of graph capture is limited to models where-in all ops in the
model can be partitioned to the JS EP or CPU EP and no memory copy
between them.
3. Shapes of inputs/outputs cannot change across inference calls.
4. IObinding is required.
The usage is like below:
Method 1: specify outputs buffers explicitly.
```
const sessionOptions = {
executionProviders: [
{
name: "webgpu",
},
],
enableGraphCapture: true,
};
const session = await ort.InferenceSession.create('./models/mobilenetv2-12.onnx', sessionOptions);
// prepare the inputBuffer/outputBuffer
... ...
const feeds = {
'input': ort.Tensor.fromGpuBuffer(inputBuffer, { dataType: 'float32', dims })
};
const fetches = {
'output': ort.Tensor.fromGpuBuffer(outputBuffer, { dataType: 'float32', dims: [1, 1000] })
};
let results = await session.run(feeds, fetches); // The first run will begin to capture the graph.
// update inputBuffer content
... ...
results = = await session.run(feeds, fetches); // The 2ed run and after will directly call replay to execute the graph.
... ...
session.release();
```
Method 2: Don't specify outputs buffers explicitly. Internally, when
graph capture is enabled, it will set all outputs location to
'gpu-buffer'.
```
const sessionOptions = {
executionProviders: [
{
name: "webgpu",
},
],
enableGraphCapture: true,
};
const session = await ort.InferenceSession.create('./models/mobilenetv2-12.onnx', sessionOptions);
// prepare the inputBuffer
... ...
const feeds = {
'input': ort.Tensor.fromGpuBuffer(inputBuffer, { dataType: 'float32', dims })
};
let results = await session.run(feeds); // The first run will begin to capture the graph.
// update inputBuffer content
... ...
results = = await session.run(feeds); // The 2ed run and after will directly call replay to execute the graph.
... ...
session.release();
2024-01-31 02:28:03 +00:00
|
|
|
* Whether enable profiling.
|
2021-05-17 21:57:19 +00:00
|
|
|
*
|
|
|
|
|
* This setting is a placeholder for a future use.
|
|
|
|
|
*/
|
|
|
|
|
enableProfiling?: boolean;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* File prefix for profiling.
|
|
|
|
|
*
|
|
|
|
|
* This setting is a placeholder for a future use.
|
|
|
|
|
*/
|
|
|
|
|
profileFilePrefix?: string;
|
|
|
|
|
|
2020-05-05 18:45:12 +00:00
|
|
|
/**
|
|
|
|
|
* Log ID.
|
2021-04-16 08:33:10 +00:00
|
|
|
*
|
|
|
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native) or WebAssembly backend
|
2020-05-05 18:45:12 +00:00
|
|
|
*/
|
|
|
|
|
logId?: string;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Log severity level. See
|
2022-08-22 17:48:12 +00:00
|
|
|
* https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/common/logging/severity.h
|
2021-04-16 08:33:10 +00:00
|
|
|
*
|
|
|
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native) or WebAssembly backend
|
2020-05-05 18:45:12 +00:00
|
|
|
*/
|
|
|
|
|
logSeverityLevel?: 0|1|2|3|4;
|
2021-05-17 21:57:19 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Log verbosity level.
|
|
|
|
|
*
|
|
|
|
|
* This setting is available only in WebAssembly backend. Will support Node.js binding and react-native later
|
|
|
|
|
*/
|
|
|
|
|
logVerbosityLevel?: number;
|
|
|
|
|
|
[js/api] introducing IO binding for tensor (#16452)
[//]: # (## Work In Progress. Feedbacks are welcome!)
### Description
This PR adds a few properties, methods and factories to Tensor type to
support IO-binding feature. This will allow user to create tensor from
GPU/CPU bound data without a force transferring of data between CPU and
GPU.
This change is a way to resolve #15312
### Change Summary
1. Add properties to `Tensor` type:
a. `location`: indicating where the data is sitting. valid values are
`cpu`, `cpu-pinned`, `texture`, `gpu-buffer`.
b. `texture`: sit side to `data`, a readonly property of `WebGLTexture`
type. available only when `location === 'texture'`
c. `gpuBuffer`: sit side to `data`, a readonly property of `GPUBuffer`
type. available only when `location === 'gpu-buffer'`
2. Add methods to `Tensor` type (usually dealing with inference
outputs):
- async function `getData()` allows user to download data from GPU to
CPU manually.
- function `dispose()` allows user to release GPU resources manually.
3. Add factories for creating `Tensor` instances:
a. `fromTexture()` to create a WebGL texture bound tensor data
b. `fromGpuBuffer()` to create a WebGPUBuffer bound tensor data
c. `fromPinnedBuffer()` to create a tensor using a CPU pinned buffer
### Examples:
create tensors from texture and pass to inference session as inputs
```js
// when create session, specify we prefer 'image_output:0' to be stored on GPU as texture
const session = await InferenceSession.create('./my_model.onnx', {
executionProviders: [ 'webgl' ],
preferredOutputLocation: { 'image_output:0': 'texture' }
});
...
const myImageTexture = getTexture(); // user's function to get a texture
const myFeeds = { input0: Tensor.fromTexture(myImageTexture, { width: 224, height: 224 }) }; // shape [1, 224, 224, 4], RGBA format.
const results = await session.run(myFeeds);
const myOutputTexture = results['image_output:0'].texture;
```
2023-08-29 19:58:26 +00:00
|
|
|
/**
|
|
|
|
|
* Specify string as a preferred data location for all outputs, or an object that use output names as keys and a
|
|
|
|
|
* preferred data location as corresponding values.
|
|
|
|
|
*
|
|
|
|
|
* This setting is available only in ONNXRuntime Web for WebGL and WebGPU EP.
|
|
|
|
|
*/
|
|
|
|
|
preferredOutputLocation?: OnnxValueDataLocation|{readonly [outputName: string]: OnnxValueDataLocation};
|
|
|
|
|
|
[js/webgpu] Support capture and replay for jsep (#18989)
### Description
This PR expands the graph capture capability to JS EP, which is similar
to #16081. But for JS EP, we don't use the CUDA Graph, instead, we
records all gpu commands and replay them, which removes most of the cpu
overhead to avoid the the situation that gpu waiting for cpu.
mobilenetv2-12 becomes 3.7ms from 6ms on NV 3090 and becomes 3.38ms from
4.58ms on Intel A770.
All limitations are similar with CUDA EP:
1. Models with control-flow ops (i.e. If, Loop and Scan ops) are not
supported.
2. Usage of graph capture is limited to models where-in all ops in the
model can be partitioned to the JS EP or CPU EP and no memory copy
between them.
3. Shapes of inputs/outputs cannot change across inference calls.
4. IObinding is required.
The usage is like below:
Method 1: specify outputs buffers explicitly.
```
const sessionOptions = {
executionProviders: [
{
name: "webgpu",
},
],
enableGraphCapture: true,
};
const session = await ort.InferenceSession.create('./models/mobilenetv2-12.onnx', sessionOptions);
// prepare the inputBuffer/outputBuffer
... ...
const feeds = {
'input': ort.Tensor.fromGpuBuffer(inputBuffer, { dataType: 'float32', dims })
};
const fetches = {
'output': ort.Tensor.fromGpuBuffer(outputBuffer, { dataType: 'float32', dims: [1, 1000] })
};
let results = await session.run(feeds, fetches); // The first run will begin to capture the graph.
// update inputBuffer content
... ...
results = = await session.run(feeds, fetches); // The 2ed run and after will directly call replay to execute the graph.
... ...
session.release();
```
Method 2: Don't specify outputs buffers explicitly. Internally, when
graph capture is enabled, it will set all outputs location to
'gpu-buffer'.
```
const sessionOptions = {
executionProviders: [
{
name: "webgpu",
},
],
enableGraphCapture: true,
};
const session = await ort.InferenceSession.create('./models/mobilenetv2-12.onnx', sessionOptions);
// prepare the inputBuffer
... ...
const feeds = {
'input': ort.Tensor.fromGpuBuffer(inputBuffer, { dataType: 'float32', dims })
};
let results = await session.run(feeds); // The first run will begin to capture the graph.
// update inputBuffer content
... ...
results = = await session.run(feeds); // The 2ed run and after will directly call replay to execute the graph.
... ...
session.release();
2024-01-31 02:28:03 +00:00
|
|
|
/**
|
|
|
|
|
* Whether enable graph capture.
|
|
|
|
|
* This setting is available only in ONNXRuntime Web for WebGPU EP.
|
|
|
|
|
*/
|
|
|
|
|
enableGraphCapture?: boolean;
|
|
|
|
|
|
2021-05-17 21:57:19 +00:00
|
|
|
/**
|
|
|
|
|
* Store configurations for a session. See
|
2022-08-22 17:48:12 +00:00
|
|
|
* https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/
|
2021-05-17 21:57:19 +00:00
|
|
|
* onnxruntime_session_options_config_keys.h
|
|
|
|
|
*
|
2021-09-21 00:54:46 +00:00
|
|
|
* This setting is available only in WebAssembly backend. Will support Node.js binding and react-native later
|
2021-05-17 21:57:19 +00:00
|
|
|
*
|
2021-09-21 00:54:46 +00:00
|
|
|
* @example
|
2021-05-17 21:57:19 +00:00
|
|
|
* ```js
|
|
|
|
|
* extra: {
|
|
|
|
|
* session: {
|
|
|
|
|
* set_denormal_as_zero: "1",
|
|
|
|
|
* disable_prepacking: "1"
|
|
|
|
|
* },
|
|
|
|
|
* optimization: {
|
|
|
|
|
* enable_gelu_approximation: "1"
|
|
|
|
|
* }
|
|
|
|
|
* }
|
|
|
|
|
* ```
|
|
|
|
|
*/
|
|
|
|
|
extra?: Record<string, unknown>;
|
2020-05-05 18:45:12 +00:00
|
|
|
}
|
|
|
|
|
|
2022-05-04 06:41:36 +00:00
|
|
|
// #region execution providers
|
2021-04-16 08:33:10 +00:00
|
|
|
|
|
|
|
|
// Currently, we have the following backends to support execution providers:
|
[js/common] fix typedoc warnings (#19933)
### Description
Fix a few warnings in typedoc (for generating JS API):
```
[warning] The signature TrainingSession.loadParametersBuffer has an @param with name "buffer", which was not used.
[warning] NonTensorType, defined in ./lib/onnx-value.ts, is referenced by OnnxValue but not included in the documentation.
[warning] TensorFactory, defined in ./lib/tensor-factory.ts, is referenced by Tensor but not included in the documentation.
[warning] ExternalDataFileType, defined in ./lib/onnx-model.ts, is referenced by InferenceSession.SessionOptions.externalData but not included in the documentation.
[warning] TensorToDataUrlOptions, defined in ./lib/tensor-conversion.ts, is referenced by Tensor.toDataURL.toDataURL.options but not included in the documentation.
[warning] TensorToImageDataOptions, defined in ./lib/tensor-conversion.ts, is referenced by Tensor.toImageData.toImageData.options but not included in the documentation.
[warning] Failed to resolve link to "GpuBufferType" in comment for Env.WebGpuFlags.adapter.
[warning] Failed to resolve link to "GpuBufferType" in comment for Env.WebGpuFlags.device.
```
Changes highlighted:
- Merge `CoreMlExecutionProviderOption` and
`CoreMLExecutionProviderOption`. They expose 2 set of different options
for React-native and ORT nodejs binding. This should be fixed in future.
- Fix a few inconsistency of names between JSDoc and parameters
- Fix broken type links
- Exclude trace functions
2024-03-16 02:01:50 +00:00
|
|
|
// Backend Node.js binding: supports 'cpu', 'dml' (win32), 'coreml' (macOS) and 'cuda' (linux).
|
2024-01-14 07:04:02 +00:00
|
|
|
// Backend WebAssembly: supports 'cpu', 'wasm', 'webgpu' and 'webnn'.
|
2021-04-16 08:33:10 +00:00
|
|
|
// Backend ONNX.js: supports 'webgl'.
|
2023-06-16 09:38:41 +00:00
|
|
|
// Backend React Native: supports 'cpu', 'xnnpack', 'coreml' (iOS), 'nnapi' (Android).
|
2021-04-16 08:33:10 +00:00
|
|
|
interface ExecutionProviderOptionMap {
|
[js/common] fix typedoc warnings (#19933)
### Description
Fix a few warnings in typedoc (for generating JS API):
```
[warning] The signature TrainingSession.loadParametersBuffer has an @param with name "buffer", which was not used.
[warning] NonTensorType, defined in ./lib/onnx-value.ts, is referenced by OnnxValue but not included in the documentation.
[warning] TensorFactory, defined in ./lib/tensor-factory.ts, is referenced by Tensor but not included in the documentation.
[warning] ExternalDataFileType, defined in ./lib/onnx-model.ts, is referenced by InferenceSession.SessionOptions.externalData but not included in the documentation.
[warning] TensorToDataUrlOptions, defined in ./lib/tensor-conversion.ts, is referenced by Tensor.toDataURL.toDataURL.options but not included in the documentation.
[warning] TensorToImageDataOptions, defined in ./lib/tensor-conversion.ts, is referenced by Tensor.toImageData.toImageData.options but not included in the documentation.
[warning] Failed to resolve link to "GpuBufferType" in comment for Env.WebGpuFlags.adapter.
[warning] Failed to resolve link to "GpuBufferType" in comment for Env.WebGpuFlags.device.
```
Changes highlighted:
- Merge `CoreMlExecutionProviderOption` and
`CoreMLExecutionProviderOption`. They expose 2 set of different options
for React-native and ORT nodejs binding. This should be fixed in future.
- Fix a few inconsistency of names between JSDoc and parameters
- Fix broken type links
- Exclude trace functions
2024-03-16 02:01:50 +00:00
|
|
|
coreml: CoreMLExecutionProviderOption;
|
2021-04-16 08:33:10 +00:00
|
|
|
cpu: CpuExecutionProviderOption;
|
|
|
|
|
cuda: CudaExecutionProviderOption;
|
2023-08-25 23:57:06 +00:00
|
|
|
dml: DmlExecutionProviderOption;
|
[js/common] fix typedoc warnings (#19933)
### Description
Fix a few warnings in typedoc (for generating JS API):
```
[warning] The signature TrainingSession.loadParametersBuffer has an @param with name "buffer", which was not used.
[warning] NonTensorType, defined in ./lib/onnx-value.ts, is referenced by OnnxValue but not included in the documentation.
[warning] TensorFactory, defined in ./lib/tensor-factory.ts, is referenced by Tensor but not included in the documentation.
[warning] ExternalDataFileType, defined in ./lib/onnx-model.ts, is referenced by InferenceSession.SessionOptions.externalData but not included in the documentation.
[warning] TensorToDataUrlOptions, defined in ./lib/tensor-conversion.ts, is referenced by Tensor.toDataURL.toDataURL.options but not included in the documentation.
[warning] TensorToImageDataOptions, defined in ./lib/tensor-conversion.ts, is referenced by Tensor.toImageData.toImageData.options but not included in the documentation.
[warning] Failed to resolve link to "GpuBufferType" in comment for Env.WebGpuFlags.adapter.
[warning] Failed to resolve link to "GpuBufferType" in comment for Env.WebGpuFlags.device.
```
Changes highlighted:
- Merge `CoreMlExecutionProviderOption` and
`CoreMLExecutionProviderOption`. They expose 2 set of different options
for React-native and ORT nodejs binding. This should be fixed in future.
- Fix a few inconsistency of names between JSDoc and parameters
- Fix broken type links
- Exclude trace functions
2024-03-16 02:01:50 +00:00
|
|
|
nnapi: NnapiExecutionProviderOption;
|
2023-08-25 23:57:06 +00:00
|
|
|
tensorrt: TensorRtExecutionProviderOption;
|
2021-04-16 08:33:10 +00:00
|
|
|
wasm: WebAssemblyExecutionProviderOption;
|
|
|
|
|
webgl: WebGLExecutionProviderOption;
|
2023-10-03 04:25:12 +00:00
|
|
|
webgpu: WebGpuExecutionProviderOption;
|
2023-05-09 04:25:10 +00:00
|
|
|
webnn: WebNNExecutionProviderOption;
|
2024-05-09 20:11:07 +00:00
|
|
|
qnn: QnnExecutionProviderOption;
|
[js/common] fix typedoc warnings (#19933)
### Description
Fix a few warnings in typedoc (for generating JS API):
```
[warning] The signature TrainingSession.loadParametersBuffer has an @param with name "buffer", which was not used.
[warning] NonTensorType, defined in ./lib/onnx-value.ts, is referenced by OnnxValue but not included in the documentation.
[warning] TensorFactory, defined in ./lib/tensor-factory.ts, is referenced by Tensor but not included in the documentation.
[warning] ExternalDataFileType, defined in ./lib/onnx-model.ts, is referenced by InferenceSession.SessionOptions.externalData but not included in the documentation.
[warning] TensorToDataUrlOptions, defined in ./lib/tensor-conversion.ts, is referenced by Tensor.toDataURL.toDataURL.options but not included in the documentation.
[warning] TensorToImageDataOptions, defined in ./lib/tensor-conversion.ts, is referenced by Tensor.toImageData.toImageData.options but not included in the documentation.
[warning] Failed to resolve link to "GpuBufferType" in comment for Env.WebGpuFlags.adapter.
[warning] Failed to resolve link to "GpuBufferType" in comment for Env.WebGpuFlags.device.
```
Changes highlighted:
- Merge `CoreMlExecutionProviderOption` and
`CoreMLExecutionProviderOption`. They expose 2 set of different options
for React-native and ORT nodejs binding. This should be fixed in future.
- Fix a few inconsistency of names between JSDoc and parameters
- Fix broken type links
- Exclude trace functions
2024-03-16 02:01:50 +00:00
|
|
|
xnnpack: XnnpackExecutionProviderOption;
|
2020-05-05 18:45:12 +00:00
|
|
|
}
|
|
|
|
|
|
2021-04-16 08:33:10 +00:00
|
|
|
type ExecutionProviderName = keyof ExecutionProviderOptionMap;
|
|
|
|
|
type ExecutionProviderConfig =
|
|
|
|
|
ExecutionProviderOptionMap[ExecutionProviderName]|ExecutionProviderOption|ExecutionProviderName|string;
|
|
|
|
|
|
|
|
|
|
export interface ExecutionProviderOption {
|
|
|
|
|
readonly name: string;
|
|
|
|
|
}
|
|
|
|
|
export interface CpuExecutionProviderOption extends ExecutionProviderOption {
|
|
|
|
|
readonly name: 'cpu';
|
|
|
|
|
useArena?: boolean;
|
|
|
|
|
}
|
|
|
|
|
export interface CudaExecutionProviderOption extends ExecutionProviderOption {
|
|
|
|
|
readonly name: 'cuda';
|
|
|
|
|
deviceId?: number;
|
|
|
|
|
}
|
2023-08-25 23:57:06 +00:00
|
|
|
export interface DmlExecutionProviderOption extends ExecutionProviderOption {
|
|
|
|
|
readonly name: 'dml';
|
|
|
|
|
deviceId?: number;
|
|
|
|
|
}
|
|
|
|
|
export interface TensorRtExecutionProviderOption extends ExecutionProviderOption {
|
|
|
|
|
readonly name: 'tensorrt';
|
|
|
|
|
deviceId?: number;
|
|
|
|
|
}
|
2021-04-16 08:33:10 +00:00
|
|
|
export interface WebAssemblyExecutionProviderOption extends ExecutionProviderOption {
|
|
|
|
|
readonly name: 'wasm';
|
|
|
|
|
}
|
|
|
|
|
export interface WebGLExecutionProviderOption extends ExecutionProviderOption {
|
|
|
|
|
readonly name: 'webgl';
|
|
|
|
|
// TODO: add flags
|
|
|
|
|
}
|
[js/web] add 'xnnpack' to EP list (#12723)
**Description**: This PR adds support for "XNNPACK EP" in ORTWeb and
changes the behavior of how ORTWeb deals with "backends", or "EPs" in
API.
**Background**: Term "backend" is introduced in ONNX.js to representing
a TypeScript type which implements a "backend" interface, which is a
similar but different concept to ORT's EP (execution provider). There
was 3 backends in ONNX.js: "cpu", "wasm" and "webgl".
When ORT Web is launched, the concept is derived to help users to
integrate smoothly. Technically, when "wasm" backend is used, users need
to also specify "EP" in the session options. Considering it may get
complicated and confused for users to figure out the difference between
"backend" and "EP", the JS API hide the "backend" concept and made a
mapping between names, backends and EPs:
"webgl" (Name) <==> "onnxjsBackend" (Backend)
"wasm" (Name) <==> "wasmBackend" (Backend) <==> "CPU" (EP)
**Details**:
The following changes are applied in this PR:
1. allow multi-registration for backends using the same name. This is
for use scenarios where both "onnxruntime-node" and "onnxruntime-web"
are consumed in a Node.js App ( so "cpu" will be registered twice in
this scenario. )
2. re-assign priority values to backends. I give 100 as base to "cpu"
for node and react_native, and 10 as base to "cpu" in web.
3. add "cpu", "xnnpack" as new names of backends.
4. update onnxruntime wasm exported functions to support EP
registration.
5. update implementations in ort web to handle execution providers in
session options.
6. add '--use_xnnpack' as default build flag for ort-web
2022-10-03 17:38:45 +00:00
|
|
|
export interface XnnpackExecutionProviderOption extends ExecutionProviderOption {
|
|
|
|
|
readonly name: 'xnnpack';
|
|
|
|
|
}
|
2023-10-03 04:25:12 +00:00
|
|
|
export interface WebGpuExecutionProviderOption extends ExecutionProviderOption {
|
|
|
|
|
readonly name: 'webgpu';
|
|
|
|
|
preferredLayout?: 'NCHW'|'NHWC';
|
|
|
|
|
}
|
[js/webnn] update API of session options for WebNN (#20816)
### Description
This PR is an API-only change to address the requirements being
discussed in #20729.
There are multiple ways that users may create an ORT session by
specifying the session options differently.
All the code snippet below will use the variable `webnnOptions` as this:
```js
const myWebnnSession = await ort.InferenceSession.create('./model.onnx', {
executionProviders: [
webnnOptions
]
});
```
### The old way (backward-compatibility)
```js
// all-default, name only
const webnnOptions_0 = 'webnn';
// all-default, properties omitted
const webnnOptions_1 = { name: 'webnn' };
// partial
const webnnOptions_2 = {
name: 'webnn',
deviceType: 'cpu'
};
// full
const webnnOptions_3 = {
name: 'webnn',
deviceType: 'gpu',
numThreads: 1,
powerPreference: 'high-performance'
};
```
### The new way (specify with MLContext)
```js
// options to create MLcontext
const options = {
deviceType: 'gpu',
powerPreference: 'high-performance'
};
const myMlContext = await navigator.ml.createContext(options);
// options for session options
const webnnOptions = {
name: 'webnn',
context: myMlContext,
...options
};
```
This should throw (because no deviceType is specified):
```js
const myMlContext = await navigator.ml.createContext({ ... });
const webnnOptions = {
name: 'webnn',
context: myMlContext
};
```
### Interop with WebGPU
```js
// get WebGPU device
const adaptor = await navigator.gpu.requestAdapter({ ... });
const device = await adaptor.requestDevice({ ... });
// set WebGPU adaptor and device
ort.env.webgpu.adaptor = adaptor;
ort.env.webgpu.device = device;
const myMlContext = await navigator.ml.createContext(device);
const webnnOptions = {
name: 'webnn',
context: myMlContext,
gpuDevice: device
};
```
This should throw (because cannot specify both gpu device and MLContext
option at the same time):
```js
const webnnOptions = {
name: 'webnn',
context: myMlContext,
gpuDevice: device,
deviceType: 'gpu'
};
```
2024-05-31 10:25:14 +00:00
|
|
|
|
|
|
|
|
// #region WebNN options
|
|
|
|
|
|
|
|
|
|
interface WebNNExecutionProviderName extends ExecutionProviderOption {
|
2023-05-09 04:25:10 +00:00
|
|
|
readonly name: 'webnn';
|
[js/webnn] update API of session options for WebNN (#20816)
### Description
This PR is an API-only change to address the requirements being
discussed in #20729.
There are multiple ways that users may create an ORT session by
specifying the session options differently.
All the code snippet below will use the variable `webnnOptions` as this:
```js
const myWebnnSession = await ort.InferenceSession.create('./model.onnx', {
executionProviders: [
webnnOptions
]
});
```
### The old way (backward-compatibility)
```js
// all-default, name only
const webnnOptions_0 = 'webnn';
// all-default, properties omitted
const webnnOptions_1 = { name: 'webnn' };
// partial
const webnnOptions_2 = {
name: 'webnn',
deviceType: 'cpu'
};
// full
const webnnOptions_3 = {
name: 'webnn',
deviceType: 'gpu',
numThreads: 1,
powerPreference: 'high-performance'
};
```
### The new way (specify with MLContext)
```js
// options to create MLcontext
const options = {
deviceType: 'gpu',
powerPreference: 'high-performance'
};
const myMlContext = await navigator.ml.createContext(options);
// options for session options
const webnnOptions = {
name: 'webnn',
context: myMlContext,
...options
};
```
This should throw (because no deviceType is specified):
```js
const myMlContext = await navigator.ml.createContext({ ... });
const webnnOptions = {
name: 'webnn',
context: myMlContext
};
```
### Interop with WebGPU
```js
// get WebGPU device
const adaptor = await navigator.gpu.requestAdapter({ ... });
const device = await adaptor.requestDevice({ ... });
// set WebGPU adaptor and device
ort.env.webgpu.adaptor = adaptor;
ort.env.webgpu.device = device;
const myMlContext = await navigator.ml.createContext(device);
const webnnOptions = {
name: 'webnn',
context: myMlContext,
gpuDevice: device
};
```
This should throw (because cannot specify both gpu device and MLContext
option at the same time):
```js
const webnnOptions = {
name: 'webnn',
context: myMlContext,
gpuDevice: device,
deviceType: 'gpu'
};
```
2024-05-31 10:25:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Represents a set of options for creating a WebNN MLContext.
|
|
|
|
|
*
|
|
|
|
|
* @see https://www.w3.org/TR/webnn/#dictdef-mlcontextoptions
|
|
|
|
|
*/
|
|
|
|
|
export interface WebNNContextOptions {
|
2024-04-16 01:43:46 +00:00
|
|
|
deviceType?: 'cpu'|'gpu'|'npu';
|
2023-11-13 00:45:10 +00:00
|
|
|
numThreads?: number;
|
2023-05-09 04:25:10 +00:00
|
|
|
powerPreference?: 'default'|'low-power'|'high-performance';
|
|
|
|
|
}
|
[js/webnn] update API of session options for WebNN (#20816)
### Description
This PR is an API-only change to address the requirements being
discussed in #20729.
There are multiple ways that users may create an ORT session by
specifying the session options differently.
All the code snippet below will use the variable `webnnOptions` as this:
```js
const myWebnnSession = await ort.InferenceSession.create('./model.onnx', {
executionProviders: [
webnnOptions
]
});
```
### The old way (backward-compatibility)
```js
// all-default, name only
const webnnOptions_0 = 'webnn';
// all-default, properties omitted
const webnnOptions_1 = { name: 'webnn' };
// partial
const webnnOptions_2 = {
name: 'webnn',
deviceType: 'cpu'
};
// full
const webnnOptions_3 = {
name: 'webnn',
deviceType: 'gpu',
numThreads: 1,
powerPreference: 'high-performance'
};
```
### The new way (specify with MLContext)
```js
// options to create MLcontext
const options = {
deviceType: 'gpu',
powerPreference: 'high-performance'
};
const myMlContext = await navigator.ml.createContext(options);
// options for session options
const webnnOptions = {
name: 'webnn',
context: myMlContext,
...options
};
```
This should throw (because no deviceType is specified):
```js
const myMlContext = await navigator.ml.createContext({ ... });
const webnnOptions = {
name: 'webnn',
context: myMlContext
};
```
### Interop with WebGPU
```js
// get WebGPU device
const adaptor = await navigator.gpu.requestAdapter({ ... });
const device = await adaptor.requestDevice({ ... });
// set WebGPU adaptor and device
ort.env.webgpu.adaptor = adaptor;
ort.env.webgpu.device = device;
const myMlContext = await navigator.ml.createContext(device);
const webnnOptions = {
name: 'webnn',
context: myMlContext,
gpuDevice: device
};
```
This should throw (because cannot specify both gpu device and MLContext
option at the same time):
```js
const webnnOptions = {
name: 'webnn',
context: myMlContext,
gpuDevice: device,
deviceType: 'gpu'
};
```
2024-05-31 10:25:14 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Represents a set of options for WebNN execution provider without MLContext.
|
|
|
|
|
*/
|
|
|
|
|
export interface WebNNOptionsWithoutMLContext extends WebNNExecutionProviderName, WebNNContextOptions {
|
|
|
|
|
context?: never;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Represents a set of options for WebNN execution provider with MLContext.
|
|
|
|
|
*
|
|
|
|
|
* When MLContext is provided, the deviceType is also required so that the WebNN EP can determine the preferred
|
|
|
|
|
* channel layout.
|
|
|
|
|
*
|
|
|
|
|
* @see https://www.w3.org/TR/webnn/#dom-ml-createcontext
|
|
|
|
|
*/
|
|
|
|
|
export interface WebNNOptionsWithMLContext extends WebNNExecutionProviderName,
|
|
|
|
|
Omit<WebNNContextOptions, 'deviceType'>,
|
|
|
|
|
Required<Pick<WebNNContextOptions, 'deviceType'>> {
|
|
|
|
|
context: unknown /* MLContext */;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Represents a set of options for WebNN execution provider with MLContext which is created from GPUDevice.
|
|
|
|
|
*
|
|
|
|
|
* @see https://www.w3.org/TR/webnn/#dom-ml-createcontext-gpudevice
|
|
|
|
|
*/
|
|
|
|
|
export interface WebNNOptionsWebGpu extends WebNNExecutionProviderName {
|
|
|
|
|
context: unknown /* MLContext */;
|
|
|
|
|
gpuDevice: unknown /* GPUDevice */;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Options for WebNN execution provider.
|
|
|
|
|
*/
|
|
|
|
|
export type WebNNExecutionProviderOption = WebNNOptionsWithoutMLContext|WebNNOptionsWithMLContext|WebNNOptionsWebGpu;
|
|
|
|
|
|
|
|
|
|
// #endregion
|
|
|
|
|
|
2024-05-09 20:11:07 +00:00
|
|
|
export interface QnnExecutionProviderOption extends ExecutionProviderOption {
|
|
|
|
|
readonly name: 'qnn';
|
|
|
|
|
// TODO add flags
|
|
|
|
|
}
|
2023-06-16 09:38:41 +00:00
|
|
|
export interface CoreMLExecutionProviderOption extends ExecutionProviderOption {
|
|
|
|
|
readonly name: 'coreml';
|
[js/common] fix typedoc warnings (#19933)
### Description
Fix a few warnings in typedoc (for generating JS API):
```
[warning] The signature TrainingSession.loadParametersBuffer has an @param with name "buffer", which was not used.
[warning] NonTensorType, defined in ./lib/onnx-value.ts, is referenced by OnnxValue but not included in the documentation.
[warning] TensorFactory, defined in ./lib/tensor-factory.ts, is referenced by Tensor but not included in the documentation.
[warning] ExternalDataFileType, defined in ./lib/onnx-model.ts, is referenced by InferenceSession.SessionOptions.externalData but not included in the documentation.
[warning] TensorToDataUrlOptions, defined in ./lib/tensor-conversion.ts, is referenced by Tensor.toDataURL.toDataURL.options but not included in the documentation.
[warning] TensorToImageDataOptions, defined in ./lib/tensor-conversion.ts, is referenced by Tensor.toImageData.toImageData.options but not included in the documentation.
[warning] Failed to resolve link to "GpuBufferType" in comment for Env.WebGpuFlags.adapter.
[warning] Failed to resolve link to "GpuBufferType" in comment for Env.WebGpuFlags.device.
```
Changes highlighted:
- Merge `CoreMlExecutionProviderOption` and
`CoreMLExecutionProviderOption`. They expose 2 set of different options
for React-native and ORT nodejs binding. This should be fixed in future.
- Fix a few inconsistency of names between JSDoc and parameters
- Fix broken type links
- Exclude trace functions
2024-03-16 02:01:50 +00:00
|
|
|
/**
|
|
|
|
|
* The bit flags for CoreML execution provider.
|
|
|
|
|
*
|
|
|
|
|
* ```
|
|
|
|
|
* COREML_FLAG_USE_CPU_ONLY = 0x001
|
|
|
|
|
* COREML_FLAG_ENABLE_ON_SUBGRAPH = 0x002
|
|
|
|
|
* COREML_FLAG_ONLY_ENABLE_DEVICE_WITH_ANE = 0x004
|
|
|
|
|
* COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES = 0x008
|
|
|
|
|
* COREML_FLAG_CREATE_MLPROGRAM = 0x010
|
|
|
|
|
* ```
|
|
|
|
|
*
|
|
|
|
|
* See include/onnxruntime/core/providers/coreml/coreml_provider_factory.h for more details.
|
|
|
|
|
*
|
|
|
|
|
* This flag is available only in ONNXRuntime (Node.js binding).
|
|
|
|
|
*/
|
|
|
|
|
coreMlFlags?: number;
|
|
|
|
|
/**
|
|
|
|
|
* Specify whether to use CPU only in CoreML EP.
|
|
|
|
|
*
|
|
|
|
|
* This setting is available only in ONNXRuntime (react-native).
|
|
|
|
|
*/
|
2023-06-16 09:38:41 +00:00
|
|
|
useCPUOnly?: boolean;
|
[js/common] fix typedoc warnings (#19933)
### Description
Fix a few warnings in typedoc (for generating JS API):
```
[warning] The signature TrainingSession.loadParametersBuffer has an @param with name "buffer", which was not used.
[warning] NonTensorType, defined in ./lib/onnx-value.ts, is referenced by OnnxValue but not included in the documentation.
[warning] TensorFactory, defined in ./lib/tensor-factory.ts, is referenced by Tensor but not included in the documentation.
[warning] ExternalDataFileType, defined in ./lib/onnx-model.ts, is referenced by InferenceSession.SessionOptions.externalData but not included in the documentation.
[warning] TensorToDataUrlOptions, defined in ./lib/tensor-conversion.ts, is referenced by Tensor.toDataURL.toDataURL.options but not included in the documentation.
[warning] TensorToImageDataOptions, defined in ./lib/tensor-conversion.ts, is referenced by Tensor.toImageData.toImageData.options but not included in the documentation.
[warning] Failed to resolve link to "GpuBufferType" in comment for Env.WebGpuFlags.adapter.
[warning] Failed to resolve link to "GpuBufferType" in comment for Env.WebGpuFlags.device.
```
Changes highlighted:
- Merge `CoreMlExecutionProviderOption` and
`CoreMLExecutionProviderOption`. They expose 2 set of different options
for React-native and ORT nodejs binding. This should be fixed in future.
- Fix a few inconsistency of names between JSDoc and parameters
- Fix broken type links
- Exclude trace functions
2024-03-16 02:01:50 +00:00
|
|
|
/**
|
|
|
|
|
* Specify whether to enable CoreML EP on subgraph.
|
|
|
|
|
*
|
|
|
|
|
* This setting is available only in ONNXRuntime (react-native).
|
|
|
|
|
*/
|
2023-06-16 09:38:41 +00:00
|
|
|
enableOnSubgraph?: boolean;
|
[js/common] fix typedoc warnings (#19933)
### Description
Fix a few warnings in typedoc (for generating JS API):
```
[warning] The signature TrainingSession.loadParametersBuffer has an @param with name "buffer", which was not used.
[warning] NonTensorType, defined in ./lib/onnx-value.ts, is referenced by OnnxValue but not included in the documentation.
[warning] TensorFactory, defined in ./lib/tensor-factory.ts, is referenced by Tensor but not included in the documentation.
[warning] ExternalDataFileType, defined in ./lib/onnx-model.ts, is referenced by InferenceSession.SessionOptions.externalData but not included in the documentation.
[warning] TensorToDataUrlOptions, defined in ./lib/tensor-conversion.ts, is referenced by Tensor.toDataURL.toDataURL.options but not included in the documentation.
[warning] TensorToImageDataOptions, defined in ./lib/tensor-conversion.ts, is referenced by Tensor.toImageData.toImageData.options but not included in the documentation.
[warning] Failed to resolve link to "GpuBufferType" in comment for Env.WebGpuFlags.adapter.
[warning] Failed to resolve link to "GpuBufferType" in comment for Env.WebGpuFlags.device.
```
Changes highlighted:
- Merge `CoreMlExecutionProviderOption` and
`CoreMLExecutionProviderOption`. They expose 2 set of different options
for React-native and ORT nodejs binding. This should be fixed in future.
- Fix a few inconsistency of names between JSDoc and parameters
- Fix broken type links
- Exclude trace functions
2024-03-16 02:01:50 +00:00
|
|
|
/**
|
|
|
|
|
* Specify whether to only enable CoreML EP for Apple devices with ANE (Apple Neural Engine).
|
|
|
|
|
*
|
|
|
|
|
* This setting is available only in ONNXRuntime (react-native).
|
|
|
|
|
*/
|
2023-06-16 09:38:41 +00:00
|
|
|
onlyEnableDeviceWithANE?: boolean;
|
|
|
|
|
}
|
|
|
|
|
export interface NnapiExecutionProviderOption extends ExecutionProviderOption {
|
|
|
|
|
readonly name: 'nnapi';
|
|
|
|
|
useFP16?: boolean;
|
|
|
|
|
useNCHW?: boolean;
|
|
|
|
|
cpuDisabled?: boolean;
|
|
|
|
|
cpuOnly?: boolean;
|
|
|
|
|
}
|
2022-05-04 06:41:36 +00:00
|
|
|
// #endregion
|
2021-04-16 08:33:10 +00:00
|
|
|
|
2022-05-04 06:41:36 +00:00
|
|
|
// #endregion
|
2020-05-05 18:45:12 +00:00
|
|
|
|
2022-05-04 06:41:36 +00:00
|
|
|
// #region run options
|
2020-05-05 18:45:12 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* A set of configurations for inference run behavior
|
|
|
|
|
*/
|
|
|
|
|
export interface RunOptions {
|
|
|
|
|
/**
|
|
|
|
|
* Log severity level. See
|
2022-08-22 17:48:12 +00:00
|
|
|
* https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/common/logging/severity.h
|
2021-04-16 08:33:10 +00:00
|
|
|
*
|
|
|
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native) or WebAssembly backend
|
2020-05-05 18:45:12 +00:00
|
|
|
*/
|
|
|
|
|
logSeverityLevel?: 0|1|2|3|4;
|
|
|
|
|
|
2021-05-17 21:57:19 +00:00
|
|
|
/**
|
|
|
|
|
* Log verbosity level.
|
|
|
|
|
*
|
|
|
|
|
* This setting is available only in WebAssembly backend. Will support Node.js binding and react-native later
|
|
|
|
|
*/
|
|
|
|
|
logVerbosityLevel?: number;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Terminate all incomplete OrtRun calls as soon as possible if true
|
|
|
|
|
*
|
|
|
|
|
* This setting is available only in WebAssembly backend. Will support Node.js binding and react-native later
|
|
|
|
|
*/
|
|
|
|
|
terminate?: boolean;
|
|
|
|
|
|
2020-05-05 18:45:12 +00:00
|
|
|
/**
|
|
|
|
|
* A tag for the Run() calls using this
|
2021-04-16 08:33:10 +00:00
|
|
|
*
|
|
|
|
|
* This setting is available only in ONNXRuntime (Node.js binding and react-native) or WebAssembly backend
|
2020-05-05 18:45:12 +00:00
|
|
|
*/
|
|
|
|
|
tag?: string;
|
2021-05-17 21:57:19 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Set a single run configuration entry. See
|
2022-08-22 17:48:12 +00:00
|
|
|
* https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/
|
2021-05-17 21:57:19 +00:00
|
|
|
* onnxruntime_run_options_config_keys.h
|
|
|
|
|
*
|
2021-09-21 00:54:46 +00:00
|
|
|
* This setting is available only in WebAssembly backend. Will support Node.js binding and react-native later
|
|
|
|
|
*
|
|
|
|
|
* @example
|
2021-05-17 21:57:19 +00:00
|
|
|
*
|
|
|
|
|
* ```js
|
|
|
|
|
* extra: {
|
|
|
|
|
* memory: {
|
|
|
|
|
* enable_memory_arena_shrinkage: "1",
|
|
|
|
|
* }
|
|
|
|
|
* }
|
|
|
|
|
* ```
|
|
|
|
|
*/
|
|
|
|
|
extra?: Record<string, unknown>;
|
2020-05-05 18:45:12 +00:00
|
|
|
}
|
|
|
|
|
|
2022-05-04 06:41:36 +00:00
|
|
|
// #endregion
|
2020-05-05 18:45:12 +00:00
|
|
|
|
2022-05-04 06:41:36 +00:00
|
|
|
// #region value metadata
|
2020-05-05 18:45:12 +00:00
|
|
|
|
|
|
|
|
// eslint-disable-next-line @typescript-eslint/no-empty-interface
|
|
|
|
|
interface ValueMetadata {
|
|
|
|
|
// TBD
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-04 06:41:36 +00:00
|
|
|
// #endregion
|
2020-05-05 18:45:12 +00:00
|
|
|
}
|
|
|
|
|
|
2021-04-16 08:33:10 +00:00
|
|
|
/**
|
|
|
|
|
* Represent a runtime instance of an ONNX model.
|
|
|
|
|
*/
|
|
|
|
|
export interface InferenceSession {
|
2022-05-04 06:41:36 +00:00
|
|
|
// #region run()
|
2021-04-16 08:33:10 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Execute the model asynchronously with the given feeds and options.
|
|
|
|
|
*
|
2021-05-02 05:20:22 +00:00
|
|
|
* @param feeds - Representation of the model input. See type description of `InferenceSession.InputType` for detail.
|
|
|
|
|
* @param options - Optional. A set of options that controls the behavior of model inference.
|
2021-04-16 08:33:10 +00:00
|
|
|
* @returns A promise that resolves to a map, which uses output names as keys and OnnxValue as corresponding values.
|
|
|
|
|
*/
|
|
|
|
|
run(feeds: InferenceSession.FeedsType, options?: InferenceSession.RunOptions): Promise<InferenceSession.ReturnType>;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Execute the model asynchronously with the given feeds, fetches and options.
|
|
|
|
|
*
|
2021-05-02 05:20:22 +00:00
|
|
|
* @param feeds - Representation of the model input. See type description of `InferenceSession.InputType` for detail.
|
|
|
|
|
* @param fetches - Representation of the model output. See type description of `InferenceSession.OutputType` for
|
2021-04-16 08:33:10 +00:00
|
|
|
* detail.
|
2021-05-02 05:20:22 +00:00
|
|
|
* @param options - Optional. A set of options that controls the behavior of model inference.
|
2021-04-16 08:33:10 +00:00
|
|
|
* @returns A promise that resolves to a map, which uses output names as keys and OnnxValue as corresponding values.
|
|
|
|
|
*/
|
|
|
|
|
run(feeds: InferenceSession.FeedsType, fetches: InferenceSession.FetchesType,
|
|
|
|
|
options?: InferenceSession.RunOptions): Promise<InferenceSession.ReturnType>;
|
|
|
|
|
|
2022-05-04 06:41:36 +00:00
|
|
|
// #endregion
|
2021-04-16 08:33:10 +00:00
|
|
|
|
2023-05-31 07:31:38 +00:00
|
|
|
// #region release()
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Release the inference session and the underlying resources.
|
|
|
|
|
*/
|
|
|
|
|
release(): Promise<void>;
|
|
|
|
|
|
|
|
|
|
// #endregion
|
|
|
|
|
|
2022-05-04 06:41:36 +00:00
|
|
|
// #region profiling
|
2021-04-27 07:04:25 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Start profiling.
|
|
|
|
|
*/
|
|
|
|
|
startProfiling(): void;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* End profiling.
|
|
|
|
|
*/
|
|
|
|
|
endProfiling(): void;
|
|
|
|
|
|
2022-05-04 06:41:36 +00:00
|
|
|
// #endregion
|
2021-04-27 07:04:25 +00:00
|
|
|
|
2022-05-04 06:41:36 +00:00
|
|
|
// #region metadata
|
2021-04-16 08:33:10 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get input names of the loaded model.
|
|
|
|
|
*/
|
|
|
|
|
readonly inputNames: readonly string[];
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get output names of the loaded model.
|
|
|
|
|
*/
|
|
|
|
|
readonly outputNames: readonly string[];
|
|
|
|
|
|
|
|
|
|
// /**
|
|
|
|
|
// * Get input metadata of the loaded model.
|
|
|
|
|
// */
|
|
|
|
|
// readonly inputMetadata: ReadonlyArray<Readonly<InferenceSession.ValueMetadata>>;
|
|
|
|
|
|
|
|
|
|
// /**
|
|
|
|
|
// * Get output metadata of the loaded model.
|
|
|
|
|
// */
|
|
|
|
|
// readonly outputMetadata: ReadonlyArray<Readonly<InferenceSession.ValueMetadata>>;
|
|
|
|
|
|
2022-05-04 06:41:36 +00:00
|
|
|
// #endregion
|
2021-04-16 08:33:10 +00:00
|
|
|
}
|
|
|
|
|
|
2020-05-05 18:45:12 +00:00
|
|
|
export interface InferenceSessionFactory {
|
2022-05-04 06:41:36 +00:00
|
|
|
// #region create()
|
2020-05-05 18:45:12 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Create a new inference session and load model asynchronously from an ONNX model file.
|
|
|
|
|
*
|
2021-05-02 05:20:22 +00:00
|
|
|
* @param uri - The URI or file path of the model to load.
|
|
|
|
|
* @param options - specify configuration for creating a new inference session.
|
2020-05-05 18:45:12 +00:00
|
|
|
* @returns A promise that resolves to an InferenceSession object.
|
|
|
|
|
*/
|
2021-04-16 08:33:10 +00:00
|
|
|
create(uri: string, options?: InferenceSession.SessionOptions): Promise<InferenceSession>;
|
2020-05-05 18:45:12 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Create a new inference session and load model asynchronously from an array bufer.
|
|
|
|
|
*
|
2021-05-02 05:20:22 +00:00
|
|
|
* @param buffer - An ArrayBuffer representation of an ONNX model.
|
|
|
|
|
* @param options - specify configuration for creating a new inference session.
|
2020-05-05 18:45:12 +00:00
|
|
|
* @returns A promise that resolves to an InferenceSession object.
|
|
|
|
|
*/
|
|
|
|
|
create(buffer: ArrayBufferLike, options?: InferenceSession.SessionOptions): Promise<InferenceSession>;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Create a new inference session and load model asynchronously from segment of an array bufer.
|
|
|
|
|
*
|
2021-05-02 05:20:22 +00:00
|
|
|
* @param buffer - An ArrayBuffer representation of an ONNX model.
|
|
|
|
|
* @param byteOffset - The beginning of the specified portion of the array buffer.
|
|
|
|
|
* @param byteLength - The length in bytes of the array buffer.
|
|
|
|
|
* @param options - specify configuration for creating a new inference session.
|
2020-05-05 18:45:12 +00:00
|
|
|
* @returns A promise that resolves to an InferenceSession object.
|
|
|
|
|
*/
|
|
|
|
|
create(buffer: ArrayBufferLike, byteOffset: number, byteLength?: number, options?: InferenceSession.SessionOptions):
|
|
|
|
|
Promise<InferenceSession>;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Create a new inference session and load model asynchronously from a Uint8Array.
|
|
|
|
|
*
|
2021-05-02 05:20:22 +00:00
|
|
|
* @param buffer - A Uint8Array representation of an ONNX model.
|
|
|
|
|
* @param options - specify configuration for creating a new inference session.
|
2020-05-05 18:45:12 +00:00
|
|
|
* @returns A promise that resolves to an InferenceSession object.
|
|
|
|
|
*/
|
|
|
|
|
create(buffer: Uint8Array, options?: InferenceSession.SessionOptions): Promise<InferenceSession>;
|
|
|
|
|
|
2022-05-04 06:41:36 +00:00
|
|
|
// #endregion
|
2020-05-05 18:45:12 +00:00
|
|
|
}
|
|
|
|
|
|
2021-04-16 08:33:10 +00:00
|
|
|
// eslint-disable-next-line @typescript-eslint/naming-convention
|
|
|
|
|
export const InferenceSession: InferenceSessionFactory = InferenceSessionImpl;
|