mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-16 21:00:14 +00:00
### Description This is a narrow implementation of Attention/MultiHeadAttention as it does not support: a. inputs 5-7 for MHA b. packed QKV/KV c. past/present d. attention mask But it works well for StableDiffusion and can be extended later. It reduces VRAM usage as it combines many ops into few I've updated demo here https://islamov.ai/stable-diffusion-webgpu/ it takes ~13sec for 1 image with 20 steps on RTX3090Ti and about 25s on M1 Pro VRAM usage is about 8gb if you don't use img2img Going to focus on SDXL now --------- Co-authored-by: Guenther Schmuelling <guschmue@microsoft.com> Co-authored-by: Yulong Wang <7679871+fs-eire@users.noreply.github.com>
109 lines
4.6 KiB
TypeScript
109 lines
4.6 KiB
TypeScript
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
// Licensed under the MIT License.
|
|
|
|
import fs from 'fs';
|
|
import {EOL} from 'os';
|
|
import path from 'path';
|
|
|
|
// The following variable allows to insert comments per operator
|
|
|
|
const COMMENTS: Record<string, string> = {
|
|
'AveragePool': 'need perf optimization; need implementing activation',
|
|
'MaxPool': 'need perf optimization; need implementing activation',
|
|
'Conv': 'need perf optimization; conv3d is not supported; need implementing activation',
|
|
'ConvTranspose': 'need perf optimization; ConvTranspose3d is not supported; need implementing activation',
|
|
'Transpose': 'need perf optimization',
|
|
'Reshape': 'no GPU kernel',
|
|
'Shape': 'no GPU kernel; an ORT warning is generated - need to fix',
|
|
'Resize': 'CoordinateTransformMode align_corners is not supported with downsampling',
|
|
'Attention': 'need implementing mask and past/present',
|
|
'MultiHeadAttention': 'need implementing mask and past/present',
|
|
};
|
|
|
|
/* eslint-disable max-len */
|
|
const MATCHERS = [
|
|
/class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME\(\s*(?<ep>\w+),\s*(?<opsetDomain>\w+),\s*(?<opsetVersionStart>\d+),\s*(?<opsetVersionEnd>\d+),\s*(?<op>\w+)\)/g,
|
|
/class ONNX_OPERATOR_KERNEL_CLASS_NAME\(\s*(?<ep>\w+),\s*(?<opsetDomain>\w+),\s*(?<opsetVersion>\d+),\s*(?<op>\w+)\)/g,
|
|
/class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME\(\s*(?<ep>\w+),\s*(?<opsetDomain>\w+),\s*(?<opsetVersionStart>\d+),\s*(?<opsetVersionEnd>\d+),\s*(?<type>\w+),\s*(?<op>\w+)\)/g,
|
|
/class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME\(\s*(?<ep>\w+),\s*(?<opsetDomain>\w+),\s*(?<opsetVersion>\d+),\s*(?<type>\w+),\s*(?<op>\w+)\)/g,
|
|
];
|
|
/* eslint-enable max-len */
|
|
|
|
const ALL_REGISTERED_OPERATORS: Map < string, {
|
|
opset: Map<string, Array<[number, number | undefined]>>;
|
|
comments: string;
|
|
}
|
|
> = new Map();
|
|
|
|
// parse js_execution_provider.cc
|
|
const JS_EXECUTION_PROVIDER_CONTENTS =
|
|
fs.readFileSync(path.join(__dirname, '../../../onnxruntime/core/providers/js/js_execution_provider.cc'), 'utf8') +
|
|
fs.readFileSync(path.join(__dirname, '../../../onnxruntime/contrib_ops/js/js_contrib_kernels.cc'), 'utf8');
|
|
MATCHERS.forEach(m => {
|
|
for (const match of JS_EXECUTION_PROVIDER_CONTENTS.matchAll(m)) {
|
|
const groups = match.groups!;
|
|
const {ep, opsetDomain, opsetVersion, opsetVersionStart, opsetVersionEnd, op} = groups;
|
|
|
|
if (ep !== 'kJsExecutionProvider') {
|
|
throw new Error(`invalid EP registration for EP name: ${ep}`);
|
|
}
|
|
let domain = '';
|
|
switch (opsetDomain) {
|
|
case 'kOnnxDomain':
|
|
domain = 'ai.onnx';
|
|
break;
|
|
case 'kMSInternalNHWCDomain':
|
|
domain = 'com.ms.internal.nhwc';
|
|
break;
|
|
case 'kMSDomain':
|
|
domain = 'com.microsoft';
|
|
break;
|
|
default:
|
|
throw new Error(`not supported domain: ${opsetDomain}`);
|
|
}
|
|
|
|
let opInfo = ALL_REGISTERED_OPERATORS.get(op);
|
|
if (!opInfo) {
|
|
opInfo = {opset: new Map(), comments: COMMENTS[op]};
|
|
ALL_REGISTERED_OPERATORS.set(op, opInfo);
|
|
}
|
|
const {opset} = opInfo;
|
|
let currentDomainInfo = opset.get(domain);
|
|
if (!currentDomainInfo) {
|
|
currentDomainInfo = [];
|
|
opset.set(domain, currentDomainInfo);
|
|
}
|
|
if (opsetVersion) {
|
|
currentDomainInfo.push([parseInt(opsetVersion, 10), undefined]);
|
|
} else {
|
|
currentDomainInfo.push([parseInt(opsetVersionStart, 10), parseInt(opsetVersionEnd, 10)]);
|
|
}
|
|
currentDomainInfo.sort((a, b) => a[0] - b[0]);
|
|
}
|
|
});
|
|
|
|
const doc = fs.createWriteStream(path.join(__dirname, '../docs/webgpu-operators.md'));
|
|
doc.write(`## Operators Support Table${EOL}${EOL}`);
|
|
doc.write(`The following table shows ONNX
|
|
operators and the supported opset domain/versions in WebGPU EP by ONNX Runtime Web. For example,
|
|
\`4-6, 8+\` means ONNX Runtime Web currently support opset version 4 to 6, 8 and above.${EOL}${EOL}`);
|
|
doc.write(`*This file is automatically generated from the
|
|
def files via [this script](../script/generate-webgpu-operator-md.ts).
|
|
Do not modify directly.*${EOL}${EOL}`);
|
|
doc.write(`| Operator | Opset | Comments |${EOL}`);
|
|
doc.write(`|:--------:|:-------------:|-----|${EOL}`);
|
|
|
|
Array.from(ALL_REGISTERED_OPERATORS.keys()).sort().forEach(op => {
|
|
const {opset, comments} = ALL_REGISTERED_OPERATORS.get(op)!;
|
|
const opsetString =
|
|
Array.from(opset.keys())
|
|
.sort()
|
|
.map(
|
|
domain => `${domain}(${
|
|
[...new Set(opset.get(domain)!.map(
|
|
ver => ver[1] ? (ver[0] === ver[1] ? `${ver[0]}` : `${ver[0]}-${ver[1]}`) : `${ver[0]}+`))]
|
|
.join(',')})`)
|
|
.join('; ');
|
|
doc.write(`| ${op} | ${opsetString} | ${comments ?? ''} |${EOL}`);
|
|
});
|
|
doc.end();
|