[js/web] support external data in npm test (#19377)

### Description
support external data in npm test.

This allows test runner to detect whether an external data is available
in the test folder, and if it is, load it as external data
automatically.

this feature does not parse every model to figure out whether the model
has external data. the following comments in code explained how to
determine whether should parse the model file.

```js
      // for performance consideration, we do not parse every model. when we think it's likely to have external
      // data, we will parse it. We think it's "likely" when one of the following conditions is met:
      // 1. any file in the same folder has the similar file name as the model file
      //    (e.g., model file is "model_abc.onnx", and there is a file "model_abc.pb" or "model_abc.onnx.data")
      // 2. the file size is larger than 1GB
```
This commit is contained in:
Yulong Wang 2024-02-02 09:05:57 -08:00 committed by GitHub
parent efc17e79de
commit 50806a7dd5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 55 additions and 6 deletions

View file

@ -12,6 +12,7 @@ import * as os from 'os';
import * as path from 'path';
import {inspect} from 'util';
import {onnx} from '../lib/onnxjs/ort-schema/protobuf/onnx';
import {bufferToBase64} from '../test/test-shared';
import {Test} from '../test/test-types';
@ -264,10 +265,12 @@ async function main() {
let modelUrl: string|null = null;
let cases: Test.ModelTestCase[] = [];
let externalData: Array<{data: string; path: string}>|undefined;
npmlog.verbose('TestRunnerCli.Init.Model', `Start to prepare test data from folder: ${testDataRootFolder}`);
try {
const maybeExternalDataFiles: Array<[fileNameWithoutExtension: string, size: number]> = [];
for (const thisPath of fs.readdirSync(testDataRootFolder)) {
const thisFullPath = path.join(testDataRootFolder, thisPath);
const stat = fs.lstatSync(thisFullPath);
@ -282,6 +285,8 @@ async function main() {
} else {
throw new Error('there are multiple model files under the folder specified');
}
} else {
maybeExternalDataFiles.push([path.parse(thisPath).name, stat.size]);
}
} else if (stat.isDirectory()) {
const dataFiles: string[] = [];
@ -307,6 +312,34 @@ async function main() {
if (modelUrl === null) {
throw new Error('there are no model file under the folder specified');
}
// for performance consideration, we do not parse every model. when we think it's likely to have external
// data, we will parse it. We think it's "likely" when one of the following conditions is met:
// 1. any file in the same folder has the similar file name as the model file
// (e.g., model file is "model_abc.onnx", and there is a file "model_abc.pb" or "model_abc.onnx.data")
// 2. the file size is larger than 1GB
const likelyToHaveExternalData = maybeExternalDataFiles.some(
([fileNameWithoutExtension, size]) =>
path.basename(modelUrl!).startsWith(fileNameWithoutExtension) || size >= 1 * 1024 * 1024 * 1024);
if (likelyToHaveExternalData) {
const model = onnx.ModelProto.decode(fs.readFileSync(path.join(testDataRootFolder, path.basename(modelUrl!))));
const externalDataPathSet = new Set<string>();
for (const initializer of model.graph!.initializer!) {
if (initializer.externalData) {
for (const data of initializer.externalData) {
if (data.key === 'location') {
externalDataPathSet.add(data.value!);
}
}
}
}
externalData = [];
const externalDataPaths = [...externalDataPathSet];
for (const dataPath of externalDataPaths) {
const fullPath = path.resolve(testDataRootFolder, dataPath);
const url = path.join(TEST_DATA_BASE, path.relative(TEST_ROOT, fullPath));
externalData.push({data: url, path: dataPath});
}
}
} catch (e) {
npmlog.error('TestRunnerCli.Init.Model', `Failed to prepare test data. Error: ${inspect(e)}`);
throw e;
@ -340,9 +373,23 @@ async function main() {
npmlog.verbose('TestRunnerCli.Init.Model', ` Model file: ${modelUrl}`);
npmlog.verbose('TestRunnerCli.Init.Model', ` Backend: ${backend}`);
npmlog.verbose('TestRunnerCli.Init.Model', ` Test set(s): ${cases.length} (${caseCount})`);
if (externalData) {
npmlog.verbose('TestRunnerCli.Init.Model', ` External data: ${externalData.length}`);
for (const data of externalData) {
npmlog.verbose('TestRunnerCli.Init.Model', ` - ${data.path}`);
}
}
npmlog.verbose('TestRunnerCli.Init.Model', '===============================================================');
return {name: path.basename(testDataRootFolder), platformCondition, modelUrl, backend, cases, ioBinding};
return {
name: path.basename(testDataRootFolder),
platformCondition,
modelUrl,
backend,
cases,
ioBinding,
externalData
};
}
function tryLocateModelTestFolder(searchPattern: string): string {

View file

@ -138,8 +138,8 @@ async function loadTensors(
async function initializeSession(
modelFilePath: string, backendHint: ort.InferenceSession.ExecutionProviderConfig, ioBindingMode: Test.IOBindingMode,
profile: boolean, sessionOptions: ort.InferenceSession.SessionOptions,
fileCache?: FileCacheBuffer): Promise<ort.InferenceSession> {
profile: boolean, externalData: ort.InferenceSession.SessionOptions['externalData'],
sessionOptions: ort.InferenceSession.SessionOptions, fileCache?: FileCacheBuffer): Promise<ort.InferenceSession> {
const preloadModelData: Uint8Array|undefined =
fileCache && fileCache[modelFilePath] ? fileCache[modelFilePath] : undefined;
Logger.verbose(
@ -153,7 +153,8 @@ async function initializeSession(
executionProviders: [backendHint],
profiler: profilerConfig,
enableProfiling: profile,
preferredOutputLocation: ioBindingMode === 'gpu-location' ? ('gpu-buffer' as const) : undefined
preferredOutputLocation: ioBindingMode === 'gpu-location' ? ('gpu-buffer' as const) : undefined,
externalData
};
let session: ort.InferenceSession;
@ -246,8 +247,8 @@ export class ModelTestContext {
const executionProviderConfig =
modelTest.backend === 'webnn' ? (testOptions?.webnnOptions || 'webnn') : modelTest.backend!;
const session = await initializeSession(
modelTest.modelUrl, executionProviderConfig, modelTest.ioBinding, profile, testOptions?.sessionOptions || {},
this.cache);
modelTest.modelUrl, executionProviderConfig, modelTest.ioBinding, profile, modelTest.externalData,
testOptions?.sessionOptions || {}, this.cache);
const initEnd = now();

View file

@ -65,6 +65,7 @@ export declare namespace Test {
export interface ModelTest {
name: string;
modelUrl: string;
externalData?: InferenceSession.SessionOptions['externalData'];
backend?: string; // value should be populated at build time
ioBinding: IOBindingMode;
platformCondition?: PlatformCondition;