From 50806a7dd515a5378043117eb462362e773f6a8b Mon Sep 17 00:00:00 2001 From: Yulong Wang <7679871+fs-eire@users.noreply.github.com> Date: Fri, 2 Feb 2024 09:05:57 -0800 Subject: [PATCH] [js/web] support external data in npm test (#19377) ### Description support external data in npm test. This allows test runner to detect whether an external data is available in the test folder, and if it is, load it as external data automatically. this feature does not parse every model to figure out whether the model has external data. the following comments in code explained how to determine whether should parse the model file. ```js // for performance consideration, we do not parse every model. when we think it's likely to have external // data, we will parse it. We think it's "likely" when one of the following conditions is met: // 1. any file in the same folder has the similar file name as the model file // (e.g., model file is "model_abc.onnx", and there is a file "model_abc.pb" or "model_abc.onnx.data") // 2. the file size is larger than 1GB ``` --- js/web/script/test-runner-cli.ts | 49 +++++++++++++++++++++++++++++++- js/web/test/test-runner.ts | 11 +++---- js/web/test/test-types.ts | 1 + 3 files changed, 55 insertions(+), 6 deletions(-) diff --git a/js/web/script/test-runner-cli.ts b/js/web/script/test-runner-cli.ts index d56792c6e3..9105c02412 100644 --- a/js/web/script/test-runner-cli.ts +++ b/js/web/script/test-runner-cli.ts @@ -12,6 +12,7 @@ import * as os from 'os'; import * as path from 'path'; import {inspect} from 'util'; +import {onnx} from '../lib/onnxjs/ort-schema/protobuf/onnx'; import {bufferToBase64} from '../test/test-shared'; import {Test} from '../test/test-types'; @@ -264,10 +265,12 @@ async function main() { let modelUrl: string|null = null; let cases: Test.ModelTestCase[] = []; + let externalData: Array<{data: string; path: string}>|undefined; npmlog.verbose('TestRunnerCli.Init.Model', `Start to prepare test data from folder: ${testDataRootFolder}`); try { + const maybeExternalDataFiles: Array<[fileNameWithoutExtension: string, size: number]> = []; for (const thisPath of fs.readdirSync(testDataRootFolder)) { const thisFullPath = path.join(testDataRootFolder, thisPath); const stat = fs.lstatSync(thisFullPath); @@ -282,6 +285,8 @@ async function main() { } else { throw new Error('there are multiple model files under the folder specified'); } + } else { + maybeExternalDataFiles.push([path.parse(thisPath).name, stat.size]); } } else if (stat.isDirectory()) { const dataFiles: string[] = []; @@ -307,6 +312,34 @@ async function main() { if (modelUrl === null) { throw new Error('there are no model file under the folder specified'); } + // for performance consideration, we do not parse every model. when we think it's likely to have external + // data, we will parse it. We think it's "likely" when one of the following conditions is met: + // 1. any file in the same folder has the similar file name as the model file + // (e.g., model file is "model_abc.onnx", and there is a file "model_abc.pb" or "model_abc.onnx.data") + // 2. the file size is larger than 1GB + const likelyToHaveExternalData = maybeExternalDataFiles.some( + ([fileNameWithoutExtension, size]) => + path.basename(modelUrl!).startsWith(fileNameWithoutExtension) || size >= 1 * 1024 * 1024 * 1024); + if (likelyToHaveExternalData) { + const model = onnx.ModelProto.decode(fs.readFileSync(path.join(testDataRootFolder, path.basename(modelUrl!)))); + const externalDataPathSet = new Set(); + for (const initializer of model.graph!.initializer!) { + if (initializer.externalData) { + for (const data of initializer.externalData) { + if (data.key === 'location') { + externalDataPathSet.add(data.value!); + } + } + } + } + externalData = []; + const externalDataPaths = [...externalDataPathSet]; + for (const dataPath of externalDataPaths) { + const fullPath = path.resolve(testDataRootFolder, dataPath); + const url = path.join(TEST_DATA_BASE, path.relative(TEST_ROOT, fullPath)); + externalData.push({data: url, path: dataPath}); + } + } } catch (e) { npmlog.error('TestRunnerCli.Init.Model', `Failed to prepare test data. Error: ${inspect(e)}`); throw e; @@ -340,9 +373,23 @@ async function main() { npmlog.verbose('TestRunnerCli.Init.Model', ` Model file: ${modelUrl}`); npmlog.verbose('TestRunnerCli.Init.Model', ` Backend: ${backend}`); npmlog.verbose('TestRunnerCli.Init.Model', ` Test set(s): ${cases.length} (${caseCount})`); + if (externalData) { + npmlog.verbose('TestRunnerCli.Init.Model', ` External data: ${externalData.length}`); + for (const data of externalData) { + npmlog.verbose('TestRunnerCli.Init.Model', ` - ${data.path}`); + } + } npmlog.verbose('TestRunnerCli.Init.Model', '==============================================================='); - return {name: path.basename(testDataRootFolder), platformCondition, modelUrl, backend, cases, ioBinding}; + return { + name: path.basename(testDataRootFolder), + platformCondition, + modelUrl, + backend, + cases, + ioBinding, + externalData + }; } function tryLocateModelTestFolder(searchPattern: string): string { diff --git a/js/web/test/test-runner.ts b/js/web/test/test-runner.ts index 442cb1bcf1..b01d474788 100644 --- a/js/web/test/test-runner.ts +++ b/js/web/test/test-runner.ts @@ -138,8 +138,8 @@ async function loadTensors( async function initializeSession( modelFilePath: string, backendHint: ort.InferenceSession.ExecutionProviderConfig, ioBindingMode: Test.IOBindingMode, - profile: boolean, sessionOptions: ort.InferenceSession.SessionOptions, - fileCache?: FileCacheBuffer): Promise { + profile: boolean, externalData: ort.InferenceSession.SessionOptions['externalData'], + sessionOptions: ort.InferenceSession.SessionOptions, fileCache?: FileCacheBuffer): Promise { const preloadModelData: Uint8Array|undefined = fileCache && fileCache[modelFilePath] ? fileCache[modelFilePath] : undefined; Logger.verbose( @@ -153,7 +153,8 @@ async function initializeSession( executionProviders: [backendHint], profiler: profilerConfig, enableProfiling: profile, - preferredOutputLocation: ioBindingMode === 'gpu-location' ? ('gpu-buffer' as const) : undefined + preferredOutputLocation: ioBindingMode === 'gpu-location' ? ('gpu-buffer' as const) : undefined, + externalData }; let session: ort.InferenceSession; @@ -246,8 +247,8 @@ export class ModelTestContext { const executionProviderConfig = modelTest.backend === 'webnn' ? (testOptions?.webnnOptions || 'webnn') : modelTest.backend!; const session = await initializeSession( - modelTest.modelUrl, executionProviderConfig, modelTest.ioBinding, profile, testOptions?.sessionOptions || {}, - this.cache); + modelTest.modelUrl, executionProviderConfig, modelTest.ioBinding, profile, modelTest.externalData, + testOptions?.sessionOptions || {}, this.cache); const initEnd = now(); diff --git a/js/web/test/test-types.ts b/js/web/test/test-types.ts index cd008e82e5..14b9fd7c00 100644 --- a/js/web/test/test-types.ts +++ b/js/web/test/test-types.ts @@ -65,6 +65,7 @@ export declare namespace Test { export interface ModelTest { name: string; modelUrl: string; + externalData?: InferenceSession.SessionOptions['externalData']; backend?: string; // value should be populated at build time ioBinding: IOBindingMode; platformCondition?: PlatformCondition;