onnxruntime/js/web/test/test-runner.ts
Yulong Wang e605870783
[js/web] Update API for ort.env.webgpu (#23026)
### Description

This PR is a replacement of #21671. It offers a new way for accessing
the following:
- `ort.env.webgpu.adapter`:
- **deprecating**. There is no point to get the value of it. Once
`GPUDevice.adapterInfo` is supported, there is no point to set the value
too.
- `ort.env.webgpu.device`:
  - set value of `GPUDevice` if user created it. Use at user's own risk.
- get value of `Promise<GPUDevice>`. if not exist, create a new one. if
exist return it.
- `ort.env.webgpu.powerPreference`:
- **deprecating**. encouraging users to set `ort.env.webgpu.device` if
necessary.
- `ort.env.webgpu.forceFallbackAdapter`:
- **deprecating**. encouraging users to set `ort.env.webgpu.device` if
necessary.
2024-12-11 10:24:14 -08:00

1220 lines
44 KiB
TypeScript

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// WebNN API currently does not have a TypeScript definition file. This file is a workaround with types generated from
// WebNN API specification.
// https://github.com/webmachinelearning/webnn/issues/677
/// <reference path="../lib/wasm/jsep/webnn/webnn.d.ts" />
import { Float16Array as Float16ArrayPolyfill } from '@petamoriken/float16';
import { expect } from 'chai';
import * as ort from 'onnxruntime-common';
import { extname } from 'path';
import { inspect } from 'util';
import { Attribute } from '../lib/onnxjs/attribute';
import { InferenceHandler, resolveBackend, SessionHandler } from '../lib/onnxjs/backend';
import { createWebGLContext } from '../lib/onnxjs/backends/webgl/webgl-context-factory';
import { Logger, Profiler } from '../lib/onnxjs/instrument';
import { Operator } from '../lib/onnxjs/operators';
import { onnx } from '../lib/onnxjs/ort-schema/protobuf/onnx';
import { Tensor } from '../lib/onnxjs/tensor';
import { ProtoUtil } from '../lib/onnxjs/util';
import { createView } from '../lib/wasm/jsep/tensor-view';
import {
calculateTensorSizeInBytes,
isGpuBufferSupportedType,
isMLTensorSupportedType,
tensorDataTypeStringToEnum,
} from '../lib/wasm/wasm-common';
import { base64toBuffer, createMockGraph, readFile } from './test-shared';
import { Test } from './test-types';
// the threshold that used to compare 2 float numbers. See above for TensorResultValidator.floatEqual().
const CPU_THRESHOLD_ABSOLUTE_ERROR = 1.0e-4;
const CPU_THRESHOLD_RELATIVE_ERROR = 1.000001;
const WEBGL_THRESHOLD_ABSOLUTE_ERROR = 1.0e-3;
const WEBGL_THRESHOLD_RELATIVE_ERROR = 1.00001;
const WEBGL_HALF_FLOAT_THRESHOLD_ABSOLUTE_ERROR = 0.1;
const WEBGL_HALF_FLOAT_THRESHOLD_RELATIVE_ERROR = 1.02;
const WEBGPU_THRESHOLD_ABSOLUTE_ERROR = 1.0e-3;
const WEBGPU_THRESHOLD_RELATIVE_ERROR = 1.00001;
const WASM_THRESHOLD_ABSOLUTE_ERROR = 1.0e-4;
const WASM_THRESHOLD_RELATIVE_ERROR = 1.000001;
const ONNXRUNTIME_THRESHOLD_ABSOLUTE_ERROR = 1.0e-3;
const ONNXRUNTIME_THRESHOLD_RELATIVE_ERROR = 1.00001;
/**
* returns a number to represent the current timestamp in a resolution as high as possible.
*/
const now = typeof performance !== 'undefined' && performance.now ? () => performance.now() : Date.now;
function fromInternalTensor(tensor: Tensor): ort.Tensor {
return new ort.Tensor(tensor.type, tensor.data as ort.Tensor.DataType, tensor.dims);
}
async function loadTensorProto(uriOrData: string | Uint8Array, allowInt64 = false): Promise<Test.NamedTensor> {
const buf = typeof uriOrData === 'string' ? await readFile(uriOrData) : uriOrData;
const tensorProto = onnx.TensorProto.decode(buf);
let tensor: ort.Tensor;
// by default, we don't allow (u)int64. this is for backward compatibility.
if (
allowInt64 &&
tensorProto &&
tensorProto.dataType &&
(tensorProto.dataType === onnx.TensorProto.DataType.INT64 ||
tensorProto.dataType === onnx.TensorProto.DataType.UINT64)
) {
const signed = tensorProto.dataType === onnx.TensorProto.DataType.INT64;
const dataConstructor = signed ? BigInt64Array : BigUint64Array;
const length = tensorProto.rawData.byteLength / 8;
const data = new dataConstructor(length);
if (
tensorProto.rawData &&
typeof tensorProto.rawData.byteLength === 'number' &&
tensorProto.rawData.byteLength > 0
) {
const dataSource = new DataView(
tensorProto.rawData.buffer,
tensorProto.rawData.byteOffset,
tensorProto.rawData.byteLength,
);
for (let i = 0; i < length; i++) {
data[i] = signed ? dataSource.getBigInt64(i * 8, true) : dataSource.getBigUint64(i * 8, true);
}
} else {
for (let i = 0; i < length; i++) {
data[i] = BigInt((signed ? tensorProto.int64Data : tensorProto.uint64Data)![i].toString());
}
}
tensor = new ort.Tensor(signed ? 'int64' : 'uint64', data, ProtoUtil.tensorDimsFromProto(tensorProto.dims));
} else {
const internalTensor = Tensor.fromProto(tensorProto);
tensor = fromInternalTensor(internalTensor);
}
// add property 'name' to the tensor object.
const namedTensor = tensor as unknown as Test.NamedTensor;
namedTensor.name = tensorProto.name;
return namedTensor;
}
async function loadMlProto(_uriOrData: string | Uint8Array): Promise<Test.NamedTensor> {
return Promise.reject('not supported');
}
async function loadTensors(
modelMetaData: { inputNames: readonly string[]; outputNames: readonly string[] },
testCase: Test.ModelTestCase,
backendName: string,
fileCache?: FileCacheBuffer,
) {
const inputs: Test.NamedTensor[] = [];
const outputs: Test.NamedTensor[] = [];
let dataFileType: 'none' | 'pb' | 'npy' = 'none';
const allowInt64 = ['wasm', 'webgpu', 'webnn'].includes(backendName);
for (const dataFile of testCase.dataFiles) {
const ext = extname(dataFile);
if (ext.toLowerCase() === '.pb' || ext.toLowerCase() === '.tpb') {
if (dataFileType === 'none') {
dataFileType = 'pb';
}
if (dataFileType !== 'pb') {
throw new Error(`cannot load data from test case "${testCase.name}", multiple types of files detected`);
}
const uriOrData = fileCache && fileCache[dataFile] ? fileCache[dataFile] : dataFile;
const t =
ext.toLowerCase() === '.pb'
? await loadTensorProto(uriOrData, allowInt64) // onnx.TensorProto
: await loadMlProto(uriOrData);
const dataFileBasename = dataFile.split(/[/\\]/).pop()!;
if (dataFileBasename.indexOf('input') !== -1) {
inputs.push(t);
} else if (dataFileBasename.indexOf('output') !== -1) {
outputs.push(t);
}
} else {
throw new Error(`${ext} file is not supported now`);
}
}
// if model has single input/output, and tensor name is empty, we assign model's input/output names to it.
if (modelMetaData.inputNames.length === 1 && inputs.length === 1 && !inputs[0].name) {
inputs[0].name = modelMetaData.inputNames[0];
}
if (modelMetaData.outputNames.length === 1 && outputs.length === 1 && !outputs[0].name) {
outputs[0].name = modelMetaData.outputNames[0];
}
testCase.inputs = inputs;
testCase.outputs = outputs;
}
async function initializeSession(
modelFilePath: string,
backendHint: ort.InferenceSession.ExecutionProviderConfig,
ioBindingMode: Test.IOBindingMode,
profile: boolean,
externalData: ort.InferenceSession.SessionOptions['externalData'],
sessionOptions: ort.InferenceSession.SessionOptions,
fileCache?: FileCacheBuffer,
): Promise<ort.InferenceSession> {
const preloadModelData: Uint8Array | undefined =
fileCache && fileCache[modelFilePath] ? fileCache[modelFilePath] : undefined;
Logger.verbose(
'TestRunner',
`Start to load model from file: ${modelFilePath}${
preloadModelData ? ` [preloaded(${preloadModelData.byteLength})]` : ''
}`,
);
let preferredOutputLocation: ort.Tensor.DataLocation | undefined;
if (ioBindingMode === 'gpu-location') {
preferredOutputLocation = 'gpu-buffer';
} else if (ioBindingMode === 'ml-location') {
preferredOutputLocation = 'ml-tensor';
}
const profilerConfig = profile ? { maxNumberEvents: 65536 } : undefined;
const sessionConfig = {
...sessionOptions,
executionProviders: [backendHint],
profiler: profilerConfig,
enableProfiling: profile,
preferredOutputLocation,
externalData,
};
let session: ort.InferenceSession;
try {
if (preloadModelData) {
session = await ort.InferenceSession.create(preloadModelData, sessionConfig);
} else {
const modelData = await readFile(modelFilePath);
session = await ort.InferenceSession.create(modelData, sessionConfig);
}
} catch (e) {
Logger.error(
'TestRunner',
`Failed to load model from file: ${modelFilePath}. ` + `Error: ${e.message} @ ${e.fileName}:${e.lineNumber}`,
);
throw e;
}
if (profile) {
session.startProfiling();
}
Logger.verbose('TestRunner', `Finished loading model from file: ${modelFilePath}`);
return session;
}
type FileCacheBuffer = {
[filePath: string]: Uint8Array;
};
/**
* a ModelTestContext object contains all states in a ModelTest
*/
export class ModelTestContext {
private constructor(
readonly session: ort.InferenceSession,
readonly backend: string,
readonly perfData: ModelTestContext.ModelTestPerfData,
readonly ioBinding: Test.IOBindingMode,
private readonly profile: boolean,
public readonly mlContext?: MLContext,
) {}
/**
* dump the current performance data
*/
private logPerfData() {
const data = this.perfData;
Logger.verbose('TestRunner.Perf', '***Perf Data Start');
Logger.verbose('TestRunner.Perf', ` * Init : ${data.init}`);
Logger.verbose('TestRunner.Perf', ` * Running times : ${data.count}`);
Logger.verbose('TestRunner.Perf', ` * FirstRun : ${data.firstRun.toFixed(2)}`);
const runs = data.runs;
if (runs.length > 0) {
Logger.verbose('TestRunner.Perf', ` * Runs : ${runs.map((r) => r.toFixed(2)).join(', ')}`);
if (runs.length > 1) {
const sorted = runs.sort((a, b) => a - b);
Logger.verbose('TestRunner.Perf', ` * Runs P50 : ${sorted[Math.floor((runs.length - 1) / 2)].toFixed(2)}`);
const avg = runs.reduce((prev, current) => prev + current) / runs.length;
Logger.verbose('TestRunner.Perf', ` * Runs Avg : ${avg.toFixed(2)}`);
const variance = runs.reduce((prev, current) => prev + (current - avg) * (current - avg));
const sd = Math.sqrt(variance / (runs.length - 1));
Logger.verbose('TestRunner.Perf', ` * Runs SD : ${sd.toFixed(2)}`);
}
}
Logger.verbose('TestRunner.Perf', '***Perf Data End');
}
async release(): Promise<void> {
if (this.profile) {
this.session.endProfiling();
}
this.logPerfData();
await this.session.release();
}
/**
* create a ModelTestContext object that used in every test cases in the given ModelTest.
*/
static async create(
modelTest: Test.ModelTest,
profile: boolean,
testOptions?: Test.Options,
): Promise<ModelTestContext> {
if (this.initializing) {
throw new Error('cannot create a ModelTestContext object when the previous creation is not done');
}
try {
this.initializing = true;
const initStart = now();
const executionProviderConfig =
modelTest.backend === 'webnn' ? testOptions?.webnnOptions || { name: 'webnn' } : modelTest.backend!;
let mlContext: MLContext | undefined;
if (['ml-tensor', 'ml-location'].includes(modelTest.ioBinding)) {
const webnnOptions = executionProviderConfig as ort.InferenceSession.WebNNExecutionProviderOption;
const deviceType = (webnnOptions as ort.InferenceSession.WebNNContextOptions)?.deviceType;
const powerPreference = (webnnOptions as ort.InferenceSession.WebNNContextOptions)?.powerPreference;
mlContext = await navigator.ml.createContext({ deviceType, powerPreference });
(executionProviderConfig as ort.InferenceSession.WebNNExecutionProviderOption).context = mlContext;
if (!deviceType) {
(executionProviderConfig as ort.InferenceSession.WebNNContextOptions).deviceType = deviceType;
}
}
const session = await initializeSession(
modelTest.modelUrl,
executionProviderConfig,
modelTest.ioBinding,
profile,
modelTest.externalData,
testOptions?.sessionOptions || {},
this.cache,
);
const initEnd = now();
for (const testCase of modelTest.cases) {
await loadTensors(session, testCase, modelTest.backend!, this.cache);
}
return new ModelTestContext(
session,
modelTest.backend!,
{ init: initEnd - initStart, firstRun: -1, runs: [], count: 0 },
modelTest.ioBinding,
profile,
mlContext,
);
} finally {
this.initializing = false;
}
}
/**
* set the global file cache for looking up model and tensor protobuf files.
*/
static setCache(cache: Test.FileCache): void {
const keys = Object.keys(cache);
Logger.info('TestRunner', `Setting up file cache... Entry count: ${keys.length}.`);
for (const key of keys) {
this.cache[key] = base64toBuffer(cache[key]);
}
}
private static initializing = false;
private static cache: FileCacheBuffer = {};
}
export declare namespace ModelTestContext {
export interface ModelTestPerfData {
init: number;
firstRun: number;
runs: number[];
count: number;
}
}
export class TensorResultValidator {
private readonly absoluteThreshold: number;
private readonly relativeThreshold: number;
private readonly maxFloatValue: number = 3.4028234663852886e38;
private static isHalfFloat: boolean | undefined;
constructor(backend: string) {
if (backend === 'cpu') {
this.absoluteThreshold = CPU_THRESHOLD_ABSOLUTE_ERROR;
this.relativeThreshold = CPU_THRESHOLD_RELATIVE_ERROR;
} else if (backend === 'webgl') {
if (TensorResultValidator.isHalfFloat === undefined) {
TensorResultValidator.isHalfFloat = !createWebGLContext(ort.env.webgl.contextId).isRenderFloat32Supported;
}
if (TensorResultValidator.isHalfFloat) {
this.maxFloatValue = 65504;
this.absoluteThreshold = WEBGL_HALF_FLOAT_THRESHOLD_ABSOLUTE_ERROR;
this.relativeThreshold = WEBGL_HALF_FLOAT_THRESHOLD_RELATIVE_ERROR;
} else {
this.absoluteThreshold = WEBGL_THRESHOLD_ABSOLUTE_ERROR;
this.relativeThreshold = WEBGL_THRESHOLD_RELATIVE_ERROR;
}
} else if (backend === 'webgpu') {
this.absoluteThreshold = WEBGPU_THRESHOLD_ABSOLUTE_ERROR;
this.relativeThreshold = WEBGPU_THRESHOLD_RELATIVE_ERROR;
} else if (backend === 'wasm' || backend === 'webnn') {
this.absoluteThreshold = WASM_THRESHOLD_ABSOLUTE_ERROR;
this.relativeThreshold = WASM_THRESHOLD_RELATIVE_ERROR;
} else if (backend === 'onnxruntime') {
this.absoluteThreshold = ONNXRUNTIME_THRESHOLD_ABSOLUTE_ERROR;
this.relativeThreshold = ONNXRUNTIME_THRESHOLD_RELATIVE_ERROR;
} else {
throw new Error(`backend not supported: ${backend}`);
}
}
checkTensorResult(actual: Tensor[], expected: Tensor[]): void {
this.checkApiTensorResult(actual.map(fromInternalTensor), expected.map(fromInternalTensor));
}
checkApiTensorResult(actual: ort.Tensor[], expected: ort.Tensor[]): void {
// check output size
expect(actual.length, 'size of output tensors').to.equal(expected.length);
// compare output one-by-one
for (let i = 0; i < actual.length; ++i) {
const match = this.areEqual(actual[i], expected[i]);
if (!match) {
Logger.error(
'TestRunner',
`Tensor mismatch: \nACTUAL: type=${actual[i].type}; dims=[${actual[i].dims}]; data=[${actual[i].data}]\nEXPECT: type=${expected[i].type}; dims=[${expected[i].dims}]; data=[${expected[i].data}]`,
);
}
expect(match, 'tensor data should match').to.be.true;
}
}
checkNamedTensorResult(actual: Record<string, ort.Tensor>, expected: Test.NamedTensor[]): void {
// check output size
expect(Object.getOwnPropertyNames(actual).length, 'size of output tensors').to.equal(expected.length);
// check output mapping
for (const expectedOneOutput of expected) {
expect(actual, 'keys of output tensors').to.contain.keys(expectedOneOutput.name);
}
this.checkApiTensorResult(
expected.map((i) => actual[i.name]!),
expected,
);
}
// This function check whether 2 tensors should be considered as 'match' or not
areEqual(actual: ort.Tensor, expected: ort.Tensor): boolean {
if (!actual || !expected) {
return false;
}
if (!actual.dims || !expected.dims) {
return false;
}
const actualDims = actual.dims;
const actualType = actual.type;
const expectedDims = expected.dims;
const expectedType = expected.type;
if (actualType !== expectedType) {
return false;
}
if (actualDims.length !== expectedDims.length) {
return false;
}
for (let i = 0; i < actualDims.length; i++) {
if (actualDims[i] !== expectedDims[i]) {
return false;
}
}
switch (actualType) {
case 'string':
return this.strictEqual(actual.data, expected.data);
case 'float16': {
const actualData = actual.data as Uint16Array;
const actualDataBuffer = actualData.buffer;
const actualDataByteOffset = actualData.byteOffset;
const actualDataLength = actualData.length;
const actualDataFloat32Array = new Float32Array(
new Float16ArrayPolyfill(actualDataBuffer, actualDataByteOffset, actualDataLength),
);
const expectedData = expected.data as Uint16Array;
const expectedDataBuffer = expectedData.buffer;
const expectedDataByteOffset = expectedData.byteOffset;
const expectedDataLength = expectedData.length;
const expectedDataFloat32Array = new Float32Array(
new Float16ArrayPolyfill(expectedDataBuffer, expectedDataByteOffset, expectedDataLength),
);
return this.floatEqual(actualDataFloat32Array, expectedDataFloat32Array);
}
case 'float32':
case 'float64':
return this.floatEqual(
actual.data as number[] | Float32Array | Float64Array,
expected.data as number[] | Float32Array | Float64Array,
);
case 'uint8':
case 'int8':
case 'uint16':
case 'int16':
case 'int32':
case 'uint32':
case 'int64':
case 'bool':
case 'int4':
case 'uint4':
return TensorResultValidator.integerEqual(
actual.data as number[] | Uint8Array | Int8Array | Uint16Array | Int16Array | Uint32Array | Int32Array,
expected.data as number[] | Uint8Array | Int8Array | Uint16Array | Int16Array | Uint32Array | Int32Array,
);
default:
throw new Error('type not implemented or not supported');
}
}
strictEqual<T>(actual: T, expected: T): boolean {
try {
expect(actual).to.deep.equal(expected);
return true;
} catch {
return false;
}
}
floatEqual(
actual: number[] | Float32Array | Float64Array,
expected: number[] | Float32Array | Float64Array,
): boolean {
if (actual.length !== expected.length) {
return false;
}
for (let i = actual.length - 1; i >= 0; i--) {
const a = actual[i];
let b = expected[i];
if (a === b) {
continue; // exact the same value, treat as equal
}
// check for NaN
//
if (Number.isNaN(a) && Number.isNaN(b)) {
continue; // 2 numbers are NaN, treat as equal
}
if (Number.isNaN(a) || Number.isNaN(b)) {
Logger.error('Validator', `a or b isNan -- index:${i}: actual=${actual[i]},expected=${expected[i]}`);
return false; // one is NaN and the other is not
}
// check for Infinity
//
if (!Number.isFinite(a) || !Number.isFinite(b)) {
Logger.error('Validator', `a or b is Infinity -- index:${i}: actual=${actual[i]},expected=${expected[i]}`);
return false; // at least one is Infinity and the other is not or their sign is different
}
// normalize value of b
b = Math.max(Math.min(expected[i], this.maxFloatValue), -this.maxFloatValue);
// Comparing 2 float numbers: (Suppose a >= b)
//
// if ( a - b < ABSOLUTE_ERROR || 1.0 < a / b < RELATIVE_ERROR)
// test pass
// else
// test fail
// endif
//
if (Math.abs(actual[i] - expected[i]) < this.absoluteThreshold) {
continue; // absolute error check pass
}
if (a !== 0 && b !== 0 && a / b < this.relativeThreshold && b / a < this.relativeThreshold) {
continue; // relative error check pass
}
// if code goes here, it means both (abs/rel) check failed.
Logger.error('Validator', `abs/rel check failed-- index:${i}: actual=${actual[i]},expected=${expected[i]}`);
return false;
}
return true;
}
static integerEqual(
actual: number[] | Uint8Array | Int8Array | Uint16Array | Int16Array | Uint32Array | Int32Array,
expected: number[] | Uint8Array | Int8Array | Uint16Array | Int16Array | Uint32Array | Int32Array,
): boolean {
if (actual.length !== expected.length) {
return false;
}
for (let i = actual.length - 1; i >= 0; i--) {
if (actual[i] !== expected[i]) {
return false;
}
}
return true;
}
}
async function createGpuTensorForInput(cpuTensor: ort.Tensor): Promise<ort.Tensor> {
if (!isGpuBufferSupportedType(cpuTensor.type) || Array.isArray(cpuTensor.data)) {
throw new Error(`createGpuTensorForInput can not work with ${cpuTensor.type} tensor`);
}
const device = await ort.env.webgpu.device;
const gpuBuffer = device.createBuffer({
// eslint-disable-next-line no-bitwise
usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE,
size: Math.ceil(cpuTensor.data.byteLength / 16) * 16,
mappedAtCreation: true,
});
const arrayBuffer = gpuBuffer.getMappedRange();
new Uint8Array(arrayBuffer).set(
new Uint8Array(cpuTensor.data.buffer, cpuTensor.data.byteOffset, cpuTensor.data.byteLength),
);
gpuBuffer.unmap();
// TODO: how to "await" for the copy to finish, so that we can get more accurate performance data?
return ort.Tensor.fromGpuBuffer(gpuBuffer, {
dataType: cpuTensor.type,
dims: cpuTensor.dims,
dispose: () => gpuBuffer.destroy(),
});
}
async function createGpuTensorForOutput(type: ort.Tensor.Type, dims: readonly number[]) {
if (!isGpuBufferSupportedType(type)) {
throw new Error(`createGpuTensorForOutput can not work with ${type} tensor`);
}
const size = calculateTensorSizeInBytes(tensorDataTypeStringToEnum(type), dims)!;
const device = await ort.env.webgpu.device;
const gpuBuffer = device.createBuffer({
// eslint-disable-next-line no-bitwise
usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE,
size: Math.ceil(size / 16) * 16,
});
return ort.Tensor.fromGpuBuffer(gpuBuffer, {
dataType: type,
dims,
dispose: () => gpuBuffer.destroy(),
download: async () => {
const stagingBuffer = device.createBuffer({
// eslint-disable-next-line no-bitwise
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
size: gpuBuffer.size,
});
const encoder = device.createCommandEncoder();
encoder.copyBufferToBuffer(gpuBuffer, 0, stagingBuffer, 0, gpuBuffer.size);
device.queue.submit([encoder.finish()]);
await stagingBuffer.mapAsync(GPUMapMode.READ);
const arrayBuffer = stagingBuffer.getMappedRange().slice(0, size);
stagingBuffer.destroy();
return createView(arrayBuffer, type) as ort.Tensor.DataTypeMap[ort.Tensor.GpuBufferDataTypes];
},
});
}
async function createMLTensorForOutput(mlContext: MLContext, type: ort.Tensor.Type, dims: readonly number[]) {
if (!isMLTensorSupportedType(type)) {
throw new Error(`createMLTensorForOutput can not work with ${type} tensor`);
}
const dataType = type === 'bool' ? 'uint8' : type;
const mlTensor = await mlContext.createTensor({
dataType,
shape: dims as number[],
// Assign both shape and dimensions while transitioning to new API.
dimensions: dims as number[],
usage: typeof MLTensorUsage == 'undefined' ? undefined : MLTensorUsage.READ,
readable: true,
});
return ort.Tensor.fromMLTensor(mlTensor, {
dataType: type,
dims,
dispose: () => mlTensor.destroy(),
download: async () => {
const arrayBuffer = await mlContext.readTensor(mlTensor);
return createView(arrayBuffer, type) as ort.Tensor.DataTypeMap[ort.Tensor.MLTensorDataTypes];
},
});
}
async function createMLTensorForInput(mlContext: MLContext, cpuTensor: ort.Tensor): Promise<ort.Tensor> {
if (!isMLTensorSupportedType(cpuTensor.type) || Array.isArray(cpuTensor.data)) {
throw new Error(`createMLTensorForInput can not work with ${cpuTensor.type} tensor`);
}
const dataType = cpuTensor.type === 'bool' ? 'uint8' : cpuTensor.type;
const mlTensor = await mlContext.createTensor({
dataType,
shape: cpuTensor.dims as number[],
// Assign both shape and dimensions while transitioning to new API.
dimensions: cpuTensor.dims as number[],
usage: typeof MLTensorUsage == 'undefined' ? undefined : MLTensorUsage.WRITE,
writable: true,
});
mlContext.writeTensor(mlTensor, cpuTensor.data);
return ort.Tensor.fromMLTensor(mlTensor, {
dataType: cpuTensor.type,
dims: cpuTensor.dims,
dispose: () => mlTensor.destroy(),
});
}
export async function sessionRun(options: {
session: ort.InferenceSession;
feeds: Record<string, ort.Tensor>;
outputsMetaInfo: Record<string, Pick<ort.Tensor, 'dims' | 'type'>>;
ioBinding: Test.IOBindingMode;
mlContext?: MLContext;
}): Promise<[number, number, ort.InferenceSession.OnnxValueMapType]> {
const session = options.session;
const feeds = options.feeds;
const fetches: Record<string, ort.Tensor> = {};
// currently we only support IO Binding for WebGPU and WebNN
//
// For inputs, we create tensors on 'gpu-tensor', 'gpu-location', 'ml-tensor', and 'ml-location' binding testing
// modes.
// For outputs, we create tensors on 'gpu-tensor' and 'ml-tensor' binding testing modes.
// in 'gpu-device' binding mode, outputs are not pre-allocated.
const shouldUploadInput = ['gpu-tensor', 'gpu-location', 'ml-location', 'ml-tensor'].includes(options.ioBinding);
const shouldUploadOutput = options.ioBinding === 'gpu-tensor' || options.ioBinding === 'ml-tensor';
try {
if (shouldUploadInput) {
// replace the CPU tensors in feeds into GPU tensors
for (const name in feeds) {
if (Object.hasOwnProperty.call(feeds, name)) {
if (feeds[name].size > 0) {
if (options.ioBinding === 'ml-location' || options.ioBinding === 'ml-tensor') {
feeds[name] = await createMLTensorForInput(options.mlContext!, feeds[name]);
} else {
feeds[name] = await createGpuTensorForInput(feeds[name]);
}
}
}
}
}
if (shouldUploadOutput) {
for (const name in options.outputsMetaInfo) {
if (Object.hasOwnProperty.call(options.outputsMetaInfo, name)) {
const { type, dims } = options.outputsMetaInfo[name];
if (dims.some((d) => d === 0)) {
fetches[name] = new ort.Tensor(type, [], dims);
} else {
if (options.ioBinding === 'ml-tensor') {
fetches[name] = await createMLTensorForOutput(options.mlContext!, type, dims);
} else {
fetches[name] = await createGpuTensorForOutput(type, dims);
}
}
}
}
}
const start = now();
Logger.verbose('TestRunner', `Timestamp before session run: ${start}`);
const outputs = await (shouldUploadOutput
? session.run(feeds, fetches)
: session.run(feeds, Object.getOwnPropertyNames(options.outputsMetaInfo)));
const end = now();
Logger.verbose('TestRunner', `Timestamp after session run: ${end}`);
// download each output tensor if needed
for (const name in outputs) {
if (Object.hasOwnProperty.call(outputs, name)) {
const tensor = outputs[name];
// Tensor.getData(true) release the underlying resource
await tensor.getData(true);
}
}
return [start, end, outputs];
} finally {
// dispose the GPU tensors in feeds
for (const name in feeds) {
if (Object.hasOwnProperty.call(feeds, name)) {
const tensor = feeds[name];
tensor.dispose();
}
}
}
}
/**
* run a single model test case. the inputs/outputs tensors should already been prepared.
*/
export async function runModelTestSet(
context: ModelTestContext,
testCase: Test.ModelTestCase,
testName: string,
): Promise<void> {
Logger.verbose('TestRunner', `Start to run test data from folder: ${testName}/${testCase.name}`);
Logger.verbose('TestRunner', `Start to run test data from folder: ${testCase.name}`);
const validator = new TensorResultValidator(context.backend);
try {
const feeds: Record<string, ort.Tensor> = {};
const outputsMetaInfo: Record<string, ort.Tensor> = {};
testCase.inputs!.forEach((tensor) => (feeds[tensor.name] = tensor));
testCase.outputs!.forEach((tensor) => (outputsMetaInfo[tensor.name] = tensor));
const [start, end, outputs] = await sessionRun({
session: context.session,
feeds,
outputsMetaInfo,
ioBinding: context.ioBinding,
mlContext: context.mlContext,
});
if (context.perfData.count === 0) {
context.perfData.firstRun = end - start;
} else {
context.perfData.runs.push(end - start);
}
context.perfData.count++;
Logger.verbose('TestRunner', `Finished running model from file: ${testCase.name}`);
Logger.verbose('TestRunner', ' Stats:');
Logger.verbose('TestRunner', ` Input(s): ${testCase.inputs!.length}`);
testCase.inputs!.forEach((i) => {
Logger.verbose('TestRunner', ` '${i.name}': ${i.type}[${i.dims.join(',')}]`);
});
Logger.verbose('TestRunner', ` Output(s): ${Object.keys(outputs).length}`);
for (const name in outputs) {
if (Object.hasOwnProperty.call(outputs, name)) {
const tensor = outputs[name];
Logger.verbose('TestRunner', ` '${name}': ${tensor.type}[${tensor.dims.join(',')}]`);
}
}
validator.checkNamedTensorResult(outputs, testCase.outputs!);
Logger.verbose('TestRunner', ' Result: PASS');
} catch (e) {
Logger.error('TestRunner', ' Result: FAILED');
Logger.error('TestRunner', `Failed to run test data from folder: ${testCase.name}. Error: ${inspect(e)}`);
throw e;
}
}
function initializeOperator(
sessionHandler: SessionHandler,
opType: string,
attributeValues: readonly Test.AttributeValue[],
opsetImports: readonly Test.OperatorTestOpsetImport[],
): Operator {
const attributes = new Attribute(undefined);
attributeValues.forEach((value) => attributes.set(value.name, value.type, value.data));
const graph = createMockGraph(opType, attributes);
return sessionHandler.resolve(graph.getNodes()[0], opsetImports, graph);
}
/**
* a OpTestContext object contains all states in a OpTest. used for webgl backend.
*/
export class OpTestContext {
static profiler = Profiler.create();
readonly backendHint: string;
sessionHandler: SessionHandler;
inferenceHandler: InferenceHandler;
constructor(protected opTest: Test.OperatorTest) {
this.backendHint = opTest.backend ?? 'cpu';
}
createOperator(): Operator {
return initializeOperator(this.sessionHandler, this.opTest.operator, this.opTest.attributes || [], [
this.opTest.opset ?? { domain: '', version: 7 },
]);
}
async dispose(): Promise<void> {
this.inferenceHandler.dispose();
this.sessionHandler.dispose();
}
async init(): Promise<void> {
const backend = await resolveBackend(this.backendHint);
this.sessionHandler = backend.createSessionHandler({ profiler: OpTestContext.profiler });
this.inferenceHandler = this.sessionHandler.createInferenceHandler();
}
}
/**
* a ProtoOpTestContext uses a protobuf model for operator test. used for ORT based backend.
*/
export class ProtoOpTestContext {
private readonly loadedData: Uint8Array; // model data, inputs, outputs
session: ort.InferenceSession;
readonly backendHint: string;
readonly ioBindingMode: Test.IOBindingMode;
constructor(
test: Test.OperatorTest,
private readonly downloadModel: boolean,
private readonly sessionOptions: ort.InferenceSession.SessionOptions = {},
) {
const opsetImport = onnx.OperatorSetIdProto.create(test.opset);
const operator = test.operator;
const attribute = (test.attributes || []).map((attr) => {
const protoAttr = onnx.AttributeProto.create({ name: attr.name });
switch (attr.type) {
case 'float':
protoAttr.type = onnx.AttributeProto.AttributeType.FLOAT;
protoAttr.f = attr.data as number;
break;
case 'int':
protoAttr.type = onnx.AttributeProto.AttributeType.INT;
protoAttr.i = attr.data as number;
break;
case 'string':
protoAttr.type = onnx.AttributeProto.AttributeType.STRING;
protoAttr.s = new TextEncoder().encode(attr.data as string);
break;
case 'floats':
protoAttr.type = onnx.AttributeProto.AttributeType.FLOATS;
protoAttr.floats = attr.data as number[];
break;
case 'ints':
protoAttr.type = onnx.AttributeProto.AttributeType.INTS;
protoAttr.ints = attr.data as number[];
break;
case 'strings':
protoAttr.type = onnx.AttributeProto.AttributeType.STRINGS;
protoAttr.strings = (attr.data as string[]).map((s) => new TextEncoder().encode(s));
break;
default:
throw new Error(`Unsupported attribute type: ${attr.type}`);
}
return protoAttr;
});
if (test.cases.length === 0) {
throw new Error(`No test cases found for test: ${test.name} [${test.operator}]`);
}
const inputCount = test.cases[0].inputs!.length;
const outputCount = test.cases[0].outputs!.length;
if (
test.cases.some((testCase) => testCase.inputs!.length !== inputCount || testCase.outputs!.length !== outputCount)
) {
throw new Error(
`Test cases for test: ${test.name} [${test.operator}] must have the same number of inputs and outputs`,
);
}
const inputsOmitted = test.cases[0].inputs.map((input) => !input.data);
const outputsOmitted = test.cases[0].outputs.map((output) => !output.data);
for (let caseIndex = 1; caseIndex < test.cases.length; caseIndex++) {
const testCase = test.cases[caseIndex];
for (let i = 0; i < inputCount; i++) {
if (inputsOmitted[i] !== !testCase.inputs![i].data) {
throw new Error(
`Test cases for test: ${test.name} [${test.operator}] must have consistent inputs data availability. Data of input[${i}] in testCase #0 and #${caseIndex} should be both available or both omitted.`,
);
}
}
for (let i = 0; i < outputCount; i++) {
if (outputsOmitted[i] !== !testCase.outputs![i].data) {
throw new Error(
`Test cases for test: ${test.name} [${test.operator}] must have consistent outputs data availability. Data of output[${i}] in testCase #0 and #${caseIndex} should be both available or both omitted.`,
);
}
}
}
const model = onnx.ModelProto.create();
model.irVersion = onnx.Version.IR_VERSION;
model.opsetImport.push(opsetImport);
model.graph = onnx.GraphProto.create();
model.graph.node = [
onnx.NodeProto.create({
input: test.cases[0].inputs!.map((t, i) => (t.data ? `input_${i}` : '')),
output: test.cases[0].outputs!.map((t, i) => (t.data ? `output_${i}` : '')),
opType: operator,
domain: test.opset?.domain,
name: operator,
attribute,
}),
];
// normalize input shape definitions
let normalizedInputShapeDefinitions: ReadonlyArray<Test.InputShapeDefinition | undefined>;
if (!test.inputShapeDefinitions || test.inputShapeDefinitions === 'none') {
// if inputShapeDefinitions is not specified, use undefined for all inputs
normalizedInputShapeDefinitions = new Array(inputCount).fill(undefined);
} else if (test.inputShapeDefinitions === 'rankOnly') {
// check if all test cases have data
if (test.cases.some((testCase) => testCase.inputs!.some((input) => !input.data || !input.dims))) {
throw new Error(
`Test cases for test: ${test.name} [${test.operator}] must have data for each inputs when inputShapeDefinitions is 'rankOnly'`,
);
}
// if inputShapeDefinitions is 'rankOnly', use semantic names for all inputs. This means only rank is specified.
normalizedInputShapeDefinitions = test.cases[0].inputs!.map((input: Test.TensorValue, i) =>
input.dims.map((_, j) => `_input_${i}_d${j}`),
);
// check if all test cases have the same rank for each inputs
if (
test.cases.some((testCase) =>
testCase.inputs!.some(
(input: Test.TensorValue, i) =>
input.dims.length !== (test.cases[0].inputs![i] as Test.TensorValue).dims.length,
),
)
) {
throw new Error(
`Test cases for test: ${test.name} [${test.operator}] must have the same rank for each inputs in different test cases`,
);
}
} else if (test.inputShapeDefinitions === 'static') {
// check if all test cases have data
if (test.cases.some((testCase) => testCase.inputs!.some((input) => !input.data || !input.dims))) {
throw new Error(
`Test cases for test: ${test.name} [${test.operator}] must have data for each inputs when inputShapeDefinitions is 'rankOnly'`,
);
}
// if inputShapeDefinitions is 'static', use the shape of the first test case for all inputs.
normalizedInputShapeDefinitions = test.cases[0].inputs!.map((input: Test.TensorValue) => input.dims);
// check if all test cases have the same shape for each inputs
if (
test.cases.some((testCase) =>
testCase.inputs!.some((input: Test.TensorValue, i) =>
TensorResultValidator.integerEqual(input.dims, (test.cases[0].inputs![i] as Test.TensorValue).dims),
),
)
) {
throw new Error(
`Test cases for test: ${test.name} [${test.operator}] must have the same shape for each inputs in different test cases`,
);
}
} else {
// if inputShapeDefinitions is specified as an array, use it as is.
// check if inputShapeDefinitions has the same number of inputs as test cases
if (test.inputShapeDefinitions && test.inputShapeDefinitions.length !== inputCount) {
throw new Error(
`Input shape definitions for test: ${test.name} [${test.operator}] must have the same number of inputs`,
);
}
normalizedInputShapeDefinitions = test.inputShapeDefinitions;
}
model.graph.input = test.cases[0]
.inputs!.map((input, i) => {
const shapeDefinition = normalizedInputShapeDefinitions[i];
const shape = shapeDefinition
? onnx.TensorShapeProto.create({
dim: shapeDefinition.map((dim) =>
onnx.TensorShapeProto.Dimension.create(typeof dim === 'string' ? { dimParam: dim } : { dimValue: dim }),
),
})
: undefined;
return onnx.ValueInfoProto.create({
name: `input_${i}`,
type: onnx.TypeProto.create({
tensorType: onnx.TypeProto.Tensor.create({ elemType: tensorDataTypeStringToEnum(input.type), shape }),
}),
});
})
.filter((_, i) => test.cases[0].inputs![i].data);
model.graph.output = test.cases[0]
.outputs!.map((output, i) =>
onnx.ValueInfoProto.create({
name: `output_${i}`,
type: onnx.TypeProto.create({
tensorType: onnx.TypeProto.Tensor.create({ elemType: tensorDataTypeStringToEnum(output.type) }),
}),
}),
)
.filter((_, i) => test.cases[0].outputs![i].data);
model.graph.name = test.name;
this.backendHint = test.backend!;
this.ioBindingMode = test.ioBinding;
this.loadedData = onnx.ModelProto.encode(model).finish().slice();
if (this.downloadModel) {
const modelFile = new File([this.loadedData], `op_test_generated_model_${test.name}.onnx`, {
type: 'application/octet-stream',
});
const modelTempUrl = URL.createObjectURL(modelFile);
const a = document.createElement('a');
a.href = modelTempUrl;
a.download = modelFile.name;
a.target = '_blank';
a.click();
URL.revokeObjectURL(modelTempUrl);
}
}
async init(): Promise<void> {
this.session = await ort.InferenceSession.create(this.loadedData, {
executionProviders: [this.backendHint],
preferredOutputLocation: this.ioBindingMode === 'gpu-location' ? ('gpu-buffer' as const) : undefined,
...this.sessionOptions,
});
}
async dispose(): Promise<void> {
await this.session.release();
}
}
async function runProtoOpTestcase(
session: ort.InferenceSession,
testCase: Test.OperatorTestCase,
ioBindingMode: Test.IOBindingMode,
validator: TensorResultValidator,
): Promise<void> {
const feeds: Record<string, ort.Tensor> = {};
const fetches: Record<string, Pick<ort.Tensor, 'dims' | 'type'>> = {};
const createTensor = (type: ort.Tensor.Type, data: number[], dims: readonly number[]): ort.Tensor => {
let buffer: number[] | BigUint64Array | BigInt64Array | Uint16Array | Uint8Array = data;
if (type === 'uint64') {
buffer = BigUint64Array.from(data.map(BigInt));
} else if (type === 'int64') {
buffer = BigInt64Array.from(data.map(BigInt));
} else if (type === 'float16') {
const dataArr = Float16ArrayPolyfill.from(data);
buffer = new Uint16Array(dataArr.buffer, dataArr.byteOffset, dataArr.byteLength / 2);
} else if (type === 'uint4' || type === 'int4') {
buffer = new Uint8Array(calculateTensorSizeInBytes(tensorDataTypeStringToEnum(type), dims)!);
// encode (u)int4 data into Uint8Array
for (let j = 0; j < data.length; j++) {
/* eslint-disable no-bitwise */
const byteIndex = j >> 1;
const bitOffset = (j & 1) << 2;
buffer[byteIndex] |= data[j] << bitOffset;
/* eslint-enable no-bitwise */
}
}
return new ort.Tensor(type, buffer, dims);
};
testCase.inputs.forEach((input, i) => {
if (input.data) {
feeds[`input_${i}`] = createTensor(input.type, input.data, input.dims);
}
});
const outputs: ort.Tensor[] = [];
const expectedOutputNames: string[] = [];
testCase.outputs.forEach((output, i) => {
if (output.data) {
outputs.push(createTensor(output.type, output.data, output.dims));
expectedOutputNames.push(`output_${i}`);
fetches[`output_${i}`] = { dims: output.dims, type: output.type };
}
});
const [, , results] = await sessionRun({ session, feeds, outputsMetaInfo: fetches, ioBinding: ioBindingMode });
const actualOutputNames = Object.getOwnPropertyNames(results);
expect(actualOutputNames.length).to.equal(expectedOutputNames.length);
expect(actualOutputNames).to.have.members(expectedOutputNames);
const actualOutputs = actualOutputNames.map((name) => results[name]);
validator.checkApiTensorResult(actualOutputs, outputs);
}
function createTensor(dims: number[], type: Tensor.DataType, data: number[]): Tensor {
const tensor = new Tensor(dims, type);
for (let i = 0; i < data.length; ++i) {
tensor.data[i] = data[i];
}
return tensor;
}
async function runOpTestcase(
inferenceHandler: InferenceHandler,
operator: Operator,
testcase: Test.OperatorTestCase,
validator: TensorResultValidator,
): Promise<void> {
testcase.inputs.forEach((input: Test.TensorValue, i) => {
Logger.verbose('TestOpRunner', ` Input '${i}': ${input.type}[${input.dims.join(',')}]`);
});
const inputTensors = testcase.inputs.map((input: Test.TensorValue) =>
createTensor(input.dims, input.type as Tensor.DataType, input.data),
);
const results = operator.impl(inferenceHandler, inputTensors, operator.context);
// try async data read.
for (const result of results) {
try {
await result.getData();
} catch {}
}
results.forEach((output, i) => {
Logger.verbose('TestOpRunner', ` Result'${i}': ${output.type}[${output.dims.join(',')}]`);
});
const expectedTensors = testcase.outputs.map((output: Test.TensorValue) =>
createTensor(output.dims, output.type as Tensor.DataType, output.data),
);
validator.checkTensorResult(results, expectedTensors);
}
/**
* run a single operator test case.
*/
export async function runOpTest(
testcase: Test.OperatorTestCase,
context: ProtoOpTestContext | OpTestContext,
): Promise<void> {
if (context instanceof ProtoOpTestContext) {
await runProtoOpTestcase(
context.session,
testcase,
context.ioBindingMode,
new TensorResultValidator(context.backendHint),
);
} else {
await runOpTestcase(
context.inferenceHandler,
context.createOperator(),
testcase,
new TensorResultValidator(context.backendHint),
);
}
}