// Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. import {expect} from 'chai'; import * as ort from 'onnxruntime-common'; import {extname} from 'path'; import {inspect} from 'util'; import {Attribute} from '../lib/onnxjs/attribute'; import {InferenceHandler, resolveBackend, SessionHandler} from '../lib/onnxjs/backend'; import {createWebGLContext} from '../lib/onnxjs/backends/webgl/webgl-context-factory'; import {Logger, Profiler} from '../lib/onnxjs/instrument'; import {Operator} from '../lib/onnxjs/operators'; import {onnx} from '../lib/onnxjs/ort-schema/protobuf/onnx'; import {Tensor} from '../lib/onnxjs/tensor'; import {ProtoUtil} from '../lib/onnxjs/util'; import {createView} from '../lib/wasm/jsep/tensor-view'; import {getTensorElementSize, isGpuBufferSupportedType, tensorDataTypeStringToEnum} from '../lib/wasm/wasm-common'; import {base64toBuffer, createMockGraph, readFile} from './test-shared'; import {Test} from './test-types'; // the threshold that used to compare 2 float numbers. See above for TensorResultValidator.floatEqual(). const CPU_THRESHOLD_ABSOLUTE_ERROR = 1.0e-4; const CPU_THRESHOLD_RELATIVE_ERROR = 1.000001; const WEBGL_THRESHOLD_ABSOLUTE_ERROR = 1.0e-3; const WEBGL_THRESHOLD_RELATIVE_ERROR = 1.00001; const WEBGL_HALF_FLOAT_THRESHOLD_ABSOLUTE_ERROR = 0.1; const WEBGL_HALF_FLOAT_THRESHOLD_RELATIVE_ERROR = 1.02; const WEBGPU_THRESHOLD_ABSOLUTE_ERROR = 1.0e-3; const WEBGPU_THRESHOLD_RELATIVE_ERROR = 1.00001; const WASM_THRESHOLD_ABSOLUTE_ERROR = 1.0e-4; const WASM_THRESHOLD_RELATIVE_ERROR = 1.000001; const ONNXRUNTIME_THRESHOLD_ABSOLUTE_ERROR = 1.0e-3; const ONNXRUNTIME_THRESHOLD_RELATIVE_ERROR = 1.00001; /** * returns a number to represent the current timestamp in a resolution as high as possible. */ const now = (typeof performance !== 'undefined' && performance.now) ? () => performance.now() : Date.now; function toInternalTensor(tensor: ort.Tensor): Tensor { return new Tensor( tensor.dims, tensor.type as Tensor.DataType, undefined, undefined, tensor.data as Tensor.NumberType); } function fromInternalTensor(tensor: Tensor): ort.Tensor { return new ort.Tensor(tensor.type, tensor.data as ort.Tensor.DataType, tensor.dims); } async function loadTensorProto(uriOrData: string|Uint8Array, allowInt64 = false): Promise { const buf = (typeof uriOrData === 'string') ? await readFile(uriOrData) : uriOrData; const tensorProto = onnx.TensorProto.decode(buf); let tensor: ort.Tensor; // by default, we don't allow (u)int64. this is for backward compatibility. if (allowInt64 && tensorProto && tensorProto.dataType && ((tensorProto.dataType === onnx.TensorProto.DataType.INT64 || tensorProto.dataType === onnx.TensorProto.DataType.UINT64))) { const signed = tensorProto.dataType === onnx.TensorProto.DataType.INT64; const dataConstructor = signed ? BigInt64Array : BigUint64Array; const length = tensorProto.rawData.byteLength / 8; const data = new dataConstructor(length); if (tensorProto.rawData && typeof tensorProto.rawData.byteLength === 'number' && tensorProto.rawData.byteLength > 0) { const dataSource = new DataView(tensorProto.rawData.buffer, tensorProto.rawData.byteOffset, tensorProto.rawData.byteLength); for (let i = 0; i < length; i++) { data[i] = signed ? dataSource.getBigInt64(i * 8, true) : dataSource.getBigUint64(i * 8, true); } } else { for (let i = 0; i < length; i++) { data[i] = BigInt((signed ? tensorProto.int64Data : tensorProto.uint64Data)![i].toString()); } } tensor = new ort.Tensor(signed ? 'int64' : 'uint64', data, ProtoUtil.tensorDimsFromProto(tensorProto.dims)); } else { const internalTensor = Tensor.fromProto(tensorProto); tensor = fromInternalTensor(internalTensor); } // add property 'name' to the tensor object. const namedTensor = tensor as unknown as Test.NamedTensor; namedTensor.name = tensorProto.name; return namedTensor; } async function loadMlProto(_uriOrData: string|Uint8Array): Promise { return Promise.reject('not supported'); } async function loadTensors( modelMetaData: {inputNames: readonly string[]; outputNames: readonly string[]}, testCase: Test.ModelTestCase, backendName: string, fileCache?: FileCacheBuffer) { const inputs: Test.NamedTensor[] = []; const outputs: Test.NamedTensor[] = []; let dataFileType: 'none'|'pb'|'npy' = 'none'; const allowInt64 = ['wasm', 'xnnpack', 'webgpu'].includes(backendName); for (const dataFile of testCase.dataFiles) { const ext = extname(dataFile); if (ext.toLowerCase() === '.pb' || ext.toLowerCase() === '.tpb') { if (dataFileType === 'none') { dataFileType = 'pb'; } if (dataFileType !== 'pb') { throw new Error(`cannot load data from test case "${testCase.name}", multiple types of files detected`); } const uriOrData = fileCache && fileCache[dataFile] ? fileCache[dataFile] : dataFile; const t = ext.toLowerCase() === '.pb' ? await loadTensorProto(uriOrData, allowInt64) : // onnx.TensorProto await loadMlProto(uriOrData); const dataFileBasename = dataFile.split(/[/\\]/).pop()!; if (dataFileBasename.indexOf('input') !== -1) { inputs.push(t); } else if (dataFileBasename.indexOf('output') !== -1) { outputs.push(t); } } else { throw new Error(`${ext} file is not supported now`); } } // if model has single input/output, and tensor name is empty, we assign model's input/output names to it. if (modelMetaData.inputNames.length === 1 && inputs.length === 1 && !inputs[0].name) { inputs[0].name = modelMetaData.inputNames[0]; } if (modelMetaData.outputNames.length === 1 && outputs.length === 1 && !outputs[0].name) { outputs[0].name = modelMetaData.outputNames[0]; } testCase.inputs = inputs; testCase.outputs = outputs; } async function initializeSession( modelFilePath: string, backendHint: ort.InferenceSession.ExecutionProviderConfig, ioBindingMode: Test.IOBindingMode, profile: boolean, sessionOptions: ort.InferenceSession.SessionOptions, fileCache?: FileCacheBuffer): Promise { const preloadModelData: Uint8Array|undefined = fileCache && fileCache[modelFilePath] ? fileCache[modelFilePath] : undefined; Logger.verbose( 'TestRunner', `Start to load model from file: ${modelFilePath}${ preloadModelData ? ` [preloaded(${preloadModelData.byteLength})]` : ''}`); const profilerConfig = profile ? {maxNumberEvents: 65536} : undefined; const sessionConfig = { ...sessionOptions, executionProviders: [backendHint], profiler: profilerConfig, enableProfiling: profile, preferredOutputLocation: ioBindingMode === 'gpu-location' ? ('gpu-buffer' as const) : undefined }; let session: ort.InferenceSession; try { if (preloadModelData) { session = await ort.InferenceSession.create(preloadModelData, sessionConfig); } else { session = await ort.InferenceSession.create(modelFilePath, sessionConfig); } } catch (e) { Logger.error( 'TestRunner', `Failed to load model from file: ${modelFilePath}. ` + `Error: ${e.message} @ ${e.fileName}:${e.lineNumber}`); throw e; } if (profile) { session.startProfiling(); } Logger.verbose('TestRunner', `Finished loading model from file: ${modelFilePath}`); return session; } type FileCacheBuffer = { [filePath: string]: Uint8Array; }; /** * a ModelTestContext object contains all states in a ModelTest */ export class ModelTestContext { private constructor( readonly session: ort.InferenceSession, readonly backend: string, readonly perfData: ModelTestContext.ModelTestPerfData, readonly ioBinding: Test.IOBindingMode, private readonly profile: boolean, ) {} /** * dump the current performance data */ private logPerfData() { const data = this.perfData; Logger.verbose('TestRunner.Perf', '***Perf Data Start'); Logger.verbose('TestRunner.Perf', ` * Init : ${data.init}`); Logger.verbose('TestRunner.Perf', ` * Running times : ${data.count}`); Logger.verbose('TestRunner.Perf', ` * FirstRun : ${data.firstRun.toFixed(2)}`); const runs = data.runs; if (runs.length > 0) { Logger.verbose('TestRunner.Perf', ` * Runs : ${runs.map(r => r.toFixed(2)).join(', ')}`); if (runs.length > 1) { const sorted = runs.sort((a, b) => a - b); Logger.verbose('TestRunner.Perf', ` * Runs P50 : ${sorted[Math.floor((runs.length - 1) / 2)].toFixed(2)}`); const avg = runs.reduce((prev, current) => prev + current) / runs.length; Logger.verbose('TestRunner.Perf', ` * Runs Avg : ${avg.toFixed(2)}`); const variance = runs.reduce((prev, current) => prev + (current - avg) * (current - avg)); const sd = Math.sqrt(variance / (runs.length - 1)); Logger.verbose('TestRunner.Perf', ` * Runs SD : ${sd.toFixed(2)}`); } } Logger.verbose('TestRunner.Perf', '***Perf Data End'); } async release(): Promise { if (this.profile) { this.session.endProfiling(); } this.logPerfData(); await this.session.release(); } /** * create a ModelTestContext object that used in every test cases in the given ModelTest. */ static async create(modelTest: Test.ModelTest, profile: boolean, testOptions?: Test.Options): Promise { if (this.initializing) { throw new Error('cannot create a ModelTestContext object when the previous creation is not done'); } try { this.initializing = true; const initStart = now(); const executionProviderConfig = modelTest.backend === 'webnn' ? (testOptions?.webnnOptions || 'webnn') : modelTest.backend!; const session = await initializeSession( modelTest.modelUrl, executionProviderConfig, modelTest.ioBinding, profile, testOptions?.sessionOptions || {}, this.cache); const initEnd = now(); for (const testCase of modelTest.cases) { await loadTensors(session, testCase, modelTest.backend!, this.cache); } return new ModelTestContext( session, modelTest.backend!, {init: initEnd - initStart, firstRun: -1, runs: [], count: 0}, modelTest.ioBinding, profile, ); } finally { this.initializing = false; } } /** * set the global file cache for looking up model and tensor protobuf files. */ static setCache(cache: Test.FileCache): void { const keys = Object.keys(cache); Logger.info('TestRunner', `Setting up file cache... Entry count: ${keys.length}.`); for (const key of keys) { this.cache[key] = base64toBuffer(cache[key]); } } private static initializing = false; private static cache: FileCacheBuffer = {}; } export declare namespace ModelTestContext { export interface ModelTestPerfData { init: number; firstRun: number; runs: number[]; count: number; } } export class TensorResultValidator { private readonly absoluteThreshold: number; private readonly relativeThreshold: number; private readonly maxFloatValue: number = 3.4028234663852886e+38; private static isHalfFloat: boolean|undefined; constructor(backend: string) { if (backend === 'cpu') { this.absoluteThreshold = CPU_THRESHOLD_ABSOLUTE_ERROR; this.relativeThreshold = CPU_THRESHOLD_RELATIVE_ERROR; } else if (backend === 'webgl') { if (TensorResultValidator.isHalfFloat === undefined) { TensorResultValidator.isHalfFloat = !createWebGLContext(ort.env.webgl.contextId).isRenderFloat32Supported; } if (TensorResultValidator.isHalfFloat) { this.maxFloatValue = 65504; this.absoluteThreshold = WEBGL_HALF_FLOAT_THRESHOLD_ABSOLUTE_ERROR; this.relativeThreshold = WEBGL_HALF_FLOAT_THRESHOLD_RELATIVE_ERROR; } else { this.absoluteThreshold = WEBGL_THRESHOLD_ABSOLUTE_ERROR; this.relativeThreshold = WEBGL_THRESHOLD_RELATIVE_ERROR; } } else if (backend === 'webgpu') { this.absoluteThreshold = WEBGPU_THRESHOLD_ABSOLUTE_ERROR; this.relativeThreshold = WEBGPU_THRESHOLD_RELATIVE_ERROR; } else if (backend === 'wasm' || backend === 'xnnpack' || backend === 'webnn') { this.absoluteThreshold = WASM_THRESHOLD_ABSOLUTE_ERROR; this.relativeThreshold = WASM_THRESHOLD_RELATIVE_ERROR; } else if (backend === 'onnxruntime') { this.absoluteThreshold = ONNXRUNTIME_THRESHOLD_ABSOLUTE_ERROR; this.relativeThreshold = ONNXRUNTIME_THRESHOLD_RELATIVE_ERROR; } else { throw new Error(`backend not supported: ${backend}`); } } checkTensorResult(actual: Tensor[], expected: Tensor[]): void { // check output size expect(actual.length, 'size of output tensors').to.equal(expected.length); // compare output one-by-one for (let i = 0; i < actual.length; ++i) { const match = this.areEqual(actual[i], expected[i]); if (!match) { Logger.error( 'TestRunner', `Tensor mismatch: \nACTUAL: type=${actual[i].type}; dims=[${actual[i].dims}]; data=[${ actual[i].data}]\nEXPECT: type=${expected[i].type}; dims=[${expected[i].dims}]; data=[${ expected[i].data}]`); } expect(match, 'tensor data should match').to.be.true; } } checkApiTensorResult(actual: ort.Tensor[], expected: ort.Tensor[]): void { this.checkTensorResult(actual.map(toInternalTensor), expected.map(toInternalTensor)); } checkNamedTensorResult(actual: Record, expected: Test.NamedTensor[]): void { // check output size expect(Object.getOwnPropertyNames(actual).length, 'size of output tensors').to.equal(expected.length); // check output mapping for (const expectedOneOutput of expected) { expect(actual, 'keys of output tensors').to.contain.keys(expectedOneOutput.name); } this.checkApiTensorResult(expected.map(i => actual[i.name]!), expected); } // This function check whether 2 tensors should be considered as 'match' or not areEqual(actual: Tensor, expected: Tensor): boolean { if (!actual || !expected) { return false; } if (!actual.dims || !expected.dims) { return false; } const actualDims = actual.dims; const actualType = actual.type; const expectedDims = expected.dims; const expectedType = expected.type; if (actualType !== expectedType) { return false; } if (actualDims.length !== expectedDims.length) { return false; } for (let i = 0; i < actualDims.length; i++) { if (actualDims[i] !== expectedDims[i]) { return false; } } switch (actualType) { case 'string': return this.strictEqual(actual.stringData, expected.stringData); case 'float32': case 'float64': return this.floatEqual( actual.numberData as number[] | Float32Array | Float64Array, expected.numberData as number[] | Float32Array | Float64Array); case 'uint8': case 'int8': case 'uint16': case 'int16': case 'int32': case 'uint32': case 'int64': case 'bool': return TensorResultValidator.integerEqual( actual.numberData as number[] | Uint8Array | Int8Array | Uint16Array | Int16Array | Uint32Array | Int32Array, expected.numberData as number[] | Uint8Array | Int8Array | Uint16Array | Int16Array | Uint32Array | Int32Array); default: throw new Error('type not implemented or not supported'); } } strictEqual(actual: T, expected: T): boolean { try { expect(actual).to.deep.equal(expected); return true; } catch { return false; } } floatEqual(actual: number[]|Float32Array|Float64Array, expected: number[]|Float32Array|Float64Array): boolean { if (actual.length !== expected.length) { return false; } for (let i = actual.length - 1; i >= 0; i--) { const a = actual[i]; let b = expected[i]; if (a === b) { continue; // exact the same value, treat as equal } // check for NaN // if (Number.isNaN(a) && Number.isNaN(b)) { continue; // 2 numbers are NaN, treat as equal } if (Number.isNaN(a) || Number.isNaN(b)) { Logger.error('Validator', `a or b isNan -- index:${i}: actual=${actual[i]},expected=${expected[i]}`); return false; // one is NaN and the other is not } // check for Infinity // if (!Number.isFinite(a) || !Number.isFinite(b)) { Logger.error('Validator', `a or b is Infinity -- index:${i}: actual=${actual[i]},expected=${expected[i]}`); return false; // at least one is Infinity and the other is not or their sign is different } // normalize value of b b = Math.max(Math.min(expected[i], this.maxFloatValue), -this.maxFloatValue); // Comparing 2 float numbers: (Suppose a >= b) // // if ( a - b < ABSOLUTE_ERROR || 1.0 < a / b < RELATIVE_ERROR) // test pass // else // test fail // endif // if (Math.abs(actual[i] - expected[i]) < this.absoluteThreshold) { continue; // absolute error check pass } if (a !== 0 && b !== 0 && a / b < this.relativeThreshold && b / a < this.relativeThreshold) { continue; // relative error check pass } // if code goes here, it means both (abs/rel) check failed. Logger.error('Validator', `abs/rel check failed-- index:${i}: actual=${actual[i]},expected=${expected[i]}`); return false; } return true; } static integerEqual( actual: number[]|Uint8Array|Int8Array|Uint16Array|Int16Array|Uint32Array|Int32Array, expected: number[]|Uint8Array|Int8Array|Uint16Array|Int16Array|Uint32Array|Int32Array): boolean { if (actual.length !== expected.length) { return false; } for (let i = actual.length - 1; i >= 0; i--) { if (actual[i] !== expected[i]) { return false; } } return true; } } function createGpuTensorForInput(cpuTensor: ort.Tensor): ort.Tensor { if (!isGpuBufferSupportedType(cpuTensor.type) || Array.isArray(cpuTensor.data)) { throw new Error(`createGpuTensorForInput can not work with ${cpuTensor.type} tensor`); } const device = ort.env.webgpu.device as GPUDevice; const gpuBuffer = device.createBuffer({ // eslint-disable-next-line no-bitwise usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE, size: Math.ceil(cpuTensor.data.byteLength / 16) * 16, mappedAtCreation: true }); const arrayBuffer = gpuBuffer.getMappedRange(); new Uint8Array(arrayBuffer) .set(new Uint8Array(cpuTensor.data.buffer, cpuTensor.data.byteOffset, cpuTensor.data.byteLength)); gpuBuffer.unmap(); // TODO: how to "await" for the copy to finish, so that we can get more accurate performance data? return ort.Tensor.fromGpuBuffer( gpuBuffer, {dataType: cpuTensor.type, dims: cpuTensor.dims, dispose: () => gpuBuffer.destroy()}); } function createGpuTensorForOutput(type: ort.Tensor.Type, dims: readonly number[]) { if (!isGpuBufferSupportedType(type)) { throw new Error(`createGpuTensorForOutput can not work with ${type} tensor`); } const elementSizeInBytes = getTensorElementSize(tensorDataTypeStringToEnum(type))!; const size = dims.reduce((a, b) => a * b, 1) * elementSizeInBytes; const device = ort.env.webgpu.device as GPUDevice; const gpuBuffer = device.createBuffer({ // eslint-disable-next-line no-bitwise usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE, size: Math.ceil(size / 16) * 16 }); return ort.Tensor.fromGpuBuffer(gpuBuffer, { dataType: type, dims, dispose: () => gpuBuffer.destroy(), download: async () => { const stagingBuffer = device.createBuffer({ // eslint-disable-next-line no-bitwise usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST, size: gpuBuffer.size }); const encoder = device.createCommandEncoder(); encoder.copyBufferToBuffer(gpuBuffer, 0, stagingBuffer, 0, gpuBuffer.size); device.queue.submit([encoder.finish()]); await stagingBuffer.mapAsync(GPUMapMode.READ); const arrayBuffer = stagingBuffer.getMappedRange().slice(0, size); stagingBuffer.destroy(); return createView(arrayBuffer, type) as ort.Tensor.DataTypeMap[ort.Tensor.GpuBufferDataTypes]; } }); } export async function sessionRun(options: { session: ort.InferenceSession; feeds: Record; outputsMetaInfo: Record>; ioBinding: Test.IOBindingMode; }): Promise<[number, number, ort.InferenceSession.OnnxValueMapType]> { const session = options.session; const feeds = options.feeds; const fetches: Record = {}; // currently we only support IO Binding for WebGPU // // For inputs, we create GPU tensors on both 'gpu-tensor' and 'gpu-location' binding testing mode. // For outputs, we create GPU tensors on 'gpu-tensor' binding testing mode only. // in 'gpu-device' binding mode, outputs are not pre-allocated. const shouldUploadInput = options.ioBinding === 'gpu-tensor' || options.ioBinding === 'gpu-location'; const shouldUploadOutput = options.ioBinding === 'gpu-tensor'; try { if (shouldUploadInput) { // replace the CPU tensors in feeds into GPU tensors for (const name in feeds) { if (Object.hasOwnProperty.call(feeds, name)) { feeds[name] = createGpuTensorForInput(feeds[name]); } } } if (shouldUploadOutput) { for (const name in options.outputsMetaInfo) { if (Object.hasOwnProperty.call(options.outputsMetaInfo, name)) { const {type, dims} = options.outputsMetaInfo[name]; fetches[name] = createGpuTensorForOutput(type, dims); } } } const start = now(); Logger.verbose('TestRunner', `Timestamp before session run: ${start}`); const outputs = await ( shouldUploadOutput ? session.run(feeds, fetches) : session.run(feeds, Object.getOwnPropertyNames(options.outputsMetaInfo))); const end = now(); Logger.verbose('TestRunner', `Timestamp after session run: ${end}`); // download each output tensor if needed for (const name in outputs) { if (Object.hasOwnProperty.call(outputs, name)) { const tensor = outputs[name]; // Tensor.getData(true) release the underlying resource await tensor.getData(true); } } return [start, end, outputs]; } finally { // dispose the GPU tensors in feeds for (const name in feeds) { if (Object.hasOwnProperty.call(feeds, name)) { const tensor = feeds[name]; tensor.dispose(); } } } } /** * run a single model test case. the inputs/outputs tensors should already been prepared. */ export async function runModelTestSet( context: ModelTestContext, testCase: Test.ModelTestCase, testName: string): Promise { Logger.verbose('TestRunner', `Start to run test data from folder: ${testName}/${testCase.name}`); Logger.verbose('TestRunner', `Start to run test data from folder: ${testCase.name}`); const validator = new TensorResultValidator(context.backend); try { const feeds: Record = {}; const outputsMetaInfo: Record = {}; testCase.inputs!.forEach((tensor, i) => feeds[context.session.inputNames[i]] = tensor); testCase.outputs!.forEach((tensor, i) => outputsMetaInfo[context.session.outputNames[i]] = tensor); const [start, end, outputs] = await sessionRun({session: context.session, feeds, outputsMetaInfo, ioBinding: context.ioBinding}); if (context.perfData.count === 0) { context.perfData.firstRun = end - start; } else { context.perfData.runs.push(end - start); } context.perfData.count++; Logger.verbose('TestRunner', `Finished running model from file: ${testCase.name}`); Logger.verbose('TestRunner', ' Stats:'); Logger.verbose('TestRunner', ` Input(s): ${testCase.inputs!.length}`); testCase.inputs!.forEach(i => { Logger.verbose('TestRunner', ` '${i.name}': ${i.type}[${i.dims.join(',')}]`); }); Logger.verbose('TestRunner', ` Output(s): ${Object.keys(outputs).length}`); for (const name in outputs) { if (Object.hasOwnProperty.call(outputs, name)) { const tensor = outputs[name]; Logger.verbose('TestRunner', ` '${name}': ${tensor.type}[${tensor.dims.join(',')}]`); } } validator.checkNamedTensorResult(outputs, testCase.outputs!); Logger.verbose('TestRunner', ' Result: PASS'); } catch (e) { Logger.error('TestRunner', ' Result: FAILED'); Logger.error('TestRunner', `Failed to run test data from folder: ${testCase.name}. Error: ${inspect(e)}`); throw e; } } function initializeOperator( sessionHandler: SessionHandler, opType: string, attributeValues: readonly Test.AttributeValue[], opsetImports: readonly Test.OperatorTestOpsetImport[]): Operator { const attributes = new Attribute(undefined); attributeValues.forEach(value => attributes.set(value.name, value.type, value.data)); const graph = createMockGraph(opType, attributes); return sessionHandler.resolve(graph.getNodes()[0], opsetImports, graph); } /** * a OpTestContext object contains all states in a OpTest. used for webgl backend. */ export class OpTestContext { static profiler = Profiler.create(); readonly backendHint: string; sessionHandler: SessionHandler; inferenceHandler: InferenceHandler; constructor(protected opTest: Test.OperatorTest) { this.backendHint = opTest.backend ?? 'cpu'; } createOperator(): Operator { return initializeOperator( this.sessionHandler, this.opTest.operator, this.opTest.attributes || [], [this.opTest.opset ?? {domain: '', version: 7}]); } async dispose(): Promise { this.inferenceHandler.dispose(); this.sessionHandler.dispose(); } async init(): Promise { const backend = await resolveBackend(this.backendHint); this.sessionHandler = backend.createSessionHandler({profiler: OpTestContext.profiler}); this.inferenceHandler = this.sessionHandler.createInferenceHandler(); } } /** * a ProtoOpTestContext uses a protobuf model for operator test. used for ORT based backend. */ export class ProtoOpTestContext { private readonly loadedData: Uint8Array; // model data, inputs, outputs session: ort.InferenceSession; readonly backendHint: string; readonly ioBindingMode: Test.IOBindingMode; constructor(test: Test.OperatorTest, private readonly sessionOptions: ort.InferenceSession.SessionOptions = {}) { const opsetImport = onnx.OperatorSetIdProto.create(test.opset); const operator = test.operator; const attribute = (test.attributes || []).map(attr => { const protoAttr = onnx.AttributeProto.create({name: attr.name}); switch (attr.type) { case 'float': protoAttr.type = onnx.AttributeProto.AttributeType.FLOAT; protoAttr.f = attr.data as number; break; case 'int': protoAttr.type = onnx.AttributeProto.AttributeType.INT; protoAttr.i = attr.data as number; break; case 'string': protoAttr.type = onnx.AttributeProto.AttributeType.STRING; protoAttr.s = new TextEncoder().encode(attr.data as string); break; case 'floats': protoAttr.type = onnx.AttributeProto.AttributeType.FLOATS; protoAttr.floats = attr.data as number[]; break; case 'ints': protoAttr.type = onnx.AttributeProto.AttributeType.INTS; protoAttr.ints = attr.data as number[]; break; case 'strings': protoAttr.type = onnx.AttributeProto.AttributeType.STRINGS; protoAttr.strings = (attr.data as string[]).map(s => new TextEncoder().encode(s)); break; default: throw new Error(`Unsupported attribute type: ${attr.type}`); } return protoAttr; }); if (test.cases.length === 0) { throw new Error(`No test cases found for test: ${test.name} [${test.operator}]`); } const inputCount = test.cases[0].inputs!.length; const outputCount = test.cases[0].outputs!.length; if (test.cases.some( testCase => testCase.inputs!.length !== inputCount || testCase.outputs!.length !== outputCount)) { throw new Error( `Test cases for test: ${test.name} [${test.operator}] must have the same number of inputs and outputs`); } const model = onnx.ModelProto.create(); model.irVersion = onnx.Version.IR_VERSION; model.opsetImport.push(opsetImport); model.graph = onnx.GraphProto.create(); model.graph.node = [onnx.NodeProto.create({ input: test.cases[0].inputs!.map((_, i) => `input_${i}`), output: test.cases[0].outputs!.map((_, i) => `output_${i}`), opType: operator, domain: test.opset?.domain, name: operator, attribute })]; // normalize input shape definitions let normalizedInputShapeDefinitions: ReadonlyArray; if (!test.inputShapeDefinitions || test.inputShapeDefinitions === 'none') { // if inputShapeDefinitions is not specified, use undefined for all inputs normalizedInputShapeDefinitions = new Array(inputCount).fill(undefined); } else if (test.inputShapeDefinitions === 'rankOnly') { // check if all test cases have data if (test.cases.some(testCase => testCase.inputs!.some(input => !input.data || !input.dims))) { throw new Error(`Test cases for test: ${test.name} [${ test.operator}] must have data for each inputs when inputShapeDefinitions is 'rankOnly'`); } // if inputShapeDefinitions is 'rankOnly', use semantic names for all inputs. This means only rank is specified. normalizedInputShapeDefinitions = test.cases[0].inputs!.map((input: Test.TensorValue, i) => input.dims.map((_, j) => `_input_${i}_d${j}`)); // check if all test cases have the same rank for each inputs if (test.cases.some( testCase => testCase.inputs!.some( (input: Test.TensorValue, i) => input.dims.length !== (test.cases[0].inputs![i] as Test.TensorValue).dims.length))) { throw new Error(`Test cases for test: ${test.name} [${ test.operator}] must have the same rank for each inputs in different test cases`); } } else if (test.inputShapeDefinitions === 'static') { // check if all test cases have data if (test.cases.some(testCase => testCase.inputs!.some(input => !input.data || !input.dims))) { throw new Error(`Test cases for test: ${test.name} [${ test.operator}] must have data for each inputs when inputShapeDefinitions is 'rankOnly'`); } // if inputShapeDefinitions is 'static', use the shape of the first test case for all inputs. normalizedInputShapeDefinitions = test.cases[0].inputs!.map((input: Test.TensorValue) => input.dims); // check if all test cases have the same shape for each inputs if (test.cases.some( testCase => testCase.inputs!.some( (input: Test.TensorValue, i) => TensorResultValidator.integerEqual( input.dims, (test.cases[0].inputs![i] as Test.TensorValue).dims)))) { throw new Error(`Test cases for test: ${test.name} [${ test.operator}] must have the same shape for each inputs in different test cases`); } } else { // if inputShapeDefinitions is specified as an array, use it as is. // check if inputShapeDefinitions has the same number of inputs as test cases if (test.inputShapeDefinitions && test.inputShapeDefinitions.length !== inputCount) { throw new Error( `Input shape definitions for test: ${test.name} [${test.operator}] must have the same number of inputs`); } normalizedInputShapeDefinitions = test.inputShapeDefinitions; } model.graph.input = test.cases[0].inputs!.map((input, i) => { const shapeDefinition = normalizedInputShapeDefinitions[i]; const shape = shapeDefinition ? onnx.TensorShapeProto.create({ dim: shapeDefinition.map( dim => onnx.TensorShapeProto.Dimension.create(typeof dim === 'string' ? {dimParam: dim} : {dimValue: dim})) }) : undefined; return onnx.ValueInfoProto.create({ name: `input_${i}`, type: onnx.TypeProto.create({ tensorType: onnx.TypeProto.Tensor.create({elemType: tensorDataTypeStringToEnum(input.type), shape}), }), }); }); model.graph.output = test.cases[0].outputs!.map((output, i) => onnx.ValueInfoProto.create({ name: `output_${i}`, type: onnx.TypeProto.create({ tensorType: onnx.TypeProto.Tensor.create({elemType: tensorDataTypeStringToEnum(output.type)}), }), })); model.graph.name = test.name; this.backendHint = test.backend!; this.ioBindingMode = test.ioBinding; this.loadedData = onnx.ModelProto.encode(model).finish().slice(); // in debug mode, open a new tab in browser for the generated onnx model. if (ort.env.debug) { const modelFile = new File([this.loadedData], `op_test_generated_model_${test.name}.onnx`, {type: 'application/octet-stream'}); const modelTempUrl = URL.createObjectURL(modelFile); const a = document.createElement('a'); a.href = modelTempUrl; a.download = modelFile.name; a.target = '_blank'; a.click(); URL.revokeObjectURL(modelTempUrl); } } async init(): Promise { this.session = await ort.InferenceSession.create(this.loadedData, { executionProviders: [this.backendHint], preferredOutputLocation: this.ioBindingMode === 'gpu-location' ? ('gpu-buffer' as const) : undefined, ...this.sessionOptions }); } async dispose(): Promise { await this.session.release(); } } async function runProtoOpTestcase( session: ort.InferenceSession, testCase: Test.OperatorTestCase, ioBindingMode: Test.IOBindingMode, validator: TensorResultValidator): Promise { const feeds: Record = {}; const fetches: Record> = {}; testCase.inputs.forEach((input, i) => { if (input.data) { let data: number[]|BigUint64Array|BigInt64Array = input.data; if (input.type === 'uint64') { data = BigUint64Array.from(input.data.map(BigInt)); } else if (input.type === 'int64') { data = BigInt64Array.from(input.data.map(BigInt)); } feeds[`input_${i}`] = new ort.Tensor(input.type, data, input.dims); } }); const outputs: ort.Tensor[] = []; const expectedOutputNames: string[] = []; testCase.outputs.forEach((output, i) => { if (output.data) { let data: number[]|BigUint64Array|BigInt64Array = output.data; if (output.type === 'uint64') { data = BigUint64Array.from(output.data.map(BigInt)); } else if (output.type === 'int64') { data = BigInt64Array.from(output.data.map(BigInt)); } outputs.push(new ort.Tensor(output.type, data, output.dims)); expectedOutputNames.push(`output_${i}`); fetches[`output_${i}`] = {dims: output.dims, type: output.type}; } }); const [, , results] = await sessionRun({session, feeds, outputsMetaInfo: fetches, ioBinding: ioBindingMode}); const actualOutputNames = Object.getOwnPropertyNames(results); expect(actualOutputNames.length).to.equal(expectedOutputNames.length); expect(actualOutputNames).to.have.members(expectedOutputNames); const actualOutputs = actualOutputNames.map(name => results[name]); validator.checkApiTensorResult(actualOutputs, outputs); } function createTensor(dims: number[], type: Tensor.DataType, data: number[]): Tensor { const tensor = new Tensor(dims, type); for (let i = 0; i < data.length; ++i) { tensor.data[i] = data[i]; } return tensor; } async function runOpTestcase( inferenceHandler: InferenceHandler, operator: Operator, testcase: Test.OperatorTestCase, validator: TensorResultValidator): Promise { testcase.inputs.forEach((input: Test.TensorValue, i) => { Logger.verbose('TestOpRunner', ` Input '${i}': ${input.type}[${input.dims.join(',')}]`); }); const inputTensors = testcase.inputs.map( (input: Test.TensorValue) => createTensor(input.dims, input.type as Tensor.DataType, input.data)); const results = operator.impl(inferenceHandler, inputTensors, operator.context); // try async data read. for (const result of results) { try { await result.getData(); } catch { } } results.forEach((output, i) => { Logger.verbose('TestOpRunner', ` Result'${i}': ${output.type}[${output.dims.join(',')}]`); }); const expectedTensors = testcase.outputs.map( (output: Test.TensorValue) => createTensor(output.dims, output.type as Tensor.DataType, output.data)); validator.checkTensorResult(results, expectedTensors); } /** * run a single operator test case. */ export async function runOpTest( testcase: Test.OperatorTestCase, context: ProtoOpTestContext|OpTestContext): Promise { if (context instanceof ProtoOpTestContext) { await runProtoOpTestcase( context.session, testcase, context.ioBindingMode, new TensorResultValidator(context.backendHint)); } else { await runOpTestcase( context.inferenceHandler, context.createOperator(), testcase, new TensorResultValidator(context.backendHint)); } }