onnxruntime/js/web/lib/wasm/jsep/webgpu/gpu-data-manager.ts
Yulong Wang abdc31de40
[js] change default formatter for JavaScript/TypeScript from clang-format to Prettier (#21728)
### Description

See
454996d496
for manual changes (excluded auto-generated formatting changes)

### Why

Because the toolsets for old clang-format is out-of-date. This reduces
the development efficiency.

- The NPM package `clang-format` is already in maintenance mode. not
updated since 2 years ago.
- The VSCode extension for clang-format is not maintained for a while,
and a recent Node.js security update made it not working at all in
Windows.

No one in community seems interested in fixing those.

Choose Prettier as it is the most popular TS/JS formatter.

### How to merge

It's easy to break the build:
- Be careful of any new commits on main not included in this PR.
- Be careful that after this PR is merged, other PRs that already passed
CI can merge.

So, make sure there is no new commits before merging this one, and
invalidate js PRs that already passed CI, force them to merge to latest.
2024-08-14 16:51:22 -07:00

487 lines
16 KiB
TypeScript

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
import { WebGpuBackend } from '../backend-webgpu';
import { LOG_DEBUG } from '../log';
import { GpuData, GpuDataId, GpuDataType } from './types';
/**
* manages GpuDataId -> GpuBuffer
*/
export interface GpuDataManager {
/**
* copy data from CPU to GPU.
*/
upload(id: GpuDataId, data: Uint8Array): void;
/**
* copy data from GPU to GPU.
*/
memcpy(sourceId: GpuDataId, destinationId: GpuDataId): void;
/**
* create new data on GPU.
*/
create(size: number, usage?: number): GpuData;
/**
* get GPU data by ID.
*/
get(id: GpuDataId): GpuData | undefined;
/**
* release the data on GPU by ID.
*
* @return size of the data released
*/
release(id: GpuDataId): number;
/**
* copy data from GPU to CPU.
*/
download(id: GpuDataId, getTargetBuffer: () => Uint8Array): Promise<void>;
/**
* refresh the buffers that marked for release.
*
* when release() is called, the buffer is not released immediately. this is because we need to wait for the commands
* to be submitted to the GPU. this function is called after the commands are submitted so that the buffers can be
* actually released.
*/
refreshPendingBuffers(): void;
/**
* register an external buffer for IO Binding. If the buffer is already registered, return the existing GPU data ID.
*
* GPU data manager only manages a mapping between the buffer and the GPU data ID. It will not manage the lifecycle of
* the external buffer.
*/
registerExternalBuffer(buffer: GPUBuffer, originalSize: number, previousBuffer?: GPUBuffer): number;
/**
* unregister an external buffer for IO Binding.
*/
unregisterExternalBuffer(buffer: GPUBuffer): void;
/**
* destroy all gpu buffers.
*/
dispose(): void;
/**
* release session related data.
* @param sessionId - specify the session ID.
*/
onReleaseSession(sessionId: number): void;
}
interface StorageCacheValue {
gpuData: GpuData;
originalSize: number;
}
const bucketFreelist: Map<number, number> = new Map([
[64, 250],
[128, 200],
[256, 200],
[512, 200],
[2048, 230],
[4096, 200],
[8192, 50],
[16384, 50],
[32768, 50],
[65536, 50],
[131072, 50],
[262144, 50],
[524288, 50],
[1048576, 50],
[2097152, 30],
[4194304, 20],
[8388608, 10],
[12582912, 10],
[16777216, 10],
[26214400, 15],
[33554432, 22],
[44236800, 2],
[58982400, 6],
// we don't want to cache the bucket sizes below but not caching them
// results in some major performance hits for models like sd-turbo.
[67108864, 6],
[134217728, 6],
[167772160, 6],
]);
const bucketArr: number[] = [];
/**
* normalize the buffer size so that it fits the 128-bits (16 bytes) alignment.
*/
const calcNormalizedBufferSize = (size: number) => Math.ceil(size / 16) * 16;
/**
* calculate the buffer size so that it fits into buckets.
*/
const calcBucketBufferSize = (size: number) => {
for (let idx = 0; idx < bucketArr.length; idx++) {
const sizeForBucket = bucketArr[idx];
if (size <= sizeForBucket) {
return sizeForBucket;
}
}
// not in bucket list -> caller will not cache, round up to 16.
return Math.ceil(size / 16) * 16;
};
let guid = 1;
const createNewGpuDataId = () => guid++;
/**
* exported standard download function. This function is used by the session to download the data from GPU, and also by
* factory to create GPU tensors with the capacity of downloading data from GPU.
*
* @param backend - the WebGPU backend
* @param gpuBuffer - the GPU buffer to download
* @param originalSize - the original size of the data
* @param getTargetBuffer - optional. If provided, the data will be copied to the target buffer. Otherwise, a new buffer
* will be created and returned.
*/
export const downloadGpuData = async (
backend: WebGpuBackend,
gpuBuffer: GPUBuffer,
originalSize: number,
getTargetBuffer?: () => Uint8Array,
): Promise<Uint8Array> => {
const bufferSize = calcNormalizedBufferSize(originalSize);
const gpuReadBuffer = backend.device.createBuffer(
// eslint-disable-next-line no-bitwise
{ size: bufferSize, usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ },
);
try {
const commandEncoder = backend.getCommandEncoder();
backend.endComputePass();
commandEncoder.copyBufferToBuffer(
gpuBuffer /* source buffer */,
0 /* source offset */,
gpuReadBuffer /* destination buffer */,
0 /* destination offset */,
bufferSize /* size */,
);
backend.flush();
await gpuReadBuffer.mapAsync(GPUMapMode.READ);
const arrayBuffer = gpuReadBuffer.getMappedRange();
if (getTargetBuffer) {
// if we already have a CPU buffer to accept the data, no need to clone the ArrayBuffer.
const targetBuffer = getTargetBuffer();
targetBuffer.set(new Uint8Array(arrayBuffer, 0, originalSize));
return targetBuffer;
} else {
// the mapped ArrayBuffer will be released when the GPU buffer is destroyed. Need to clone the
// ArrayBuffer.
return new Uint8Array(arrayBuffer.slice(0, originalSize));
}
} finally {
gpuReadBuffer.destroy();
}
};
class GpuDataManagerImpl implements GpuDataManager {
// GPU Data ID => GPU Data ( storage buffer )
private storageCache: Map<GpuDataId, StorageCacheValue>;
// pending buffers for uploading ( data is unmapped )
private buffersForUploadingPending: GPUBuffer[];
// pending buffers for computing
private buffersPending: GPUBuffer[];
// The reusable storage buffers for computing.
private freeBuffers: Map<number, GPUBuffer[]>;
// The reusable uniform buffers
private freeUniformBuffers: Map<number, GPUBuffer[]>;
// The external buffers registered users for IO Binding.
private externalBuffers: Map<GPUBuffer, GpuDataId>;
// The pendingBuffers for capture graph.
// a SessionID -> GPUBuffer[] mapping.
private capturedPendingBuffers: Map<number, GPUBuffer[]>;
constructor(private backend: WebGpuBackend) {
this.storageCache = new Map();
this.freeBuffers = new Map();
this.freeUniformBuffers = new Map();
this.buffersForUploadingPending = [];
this.buffersPending = [];
this.externalBuffers = new Map();
this.capturedPendingBuffers = new Map();
for (const [key] of bucketFreelist) {
bucketArr.push(key);
this.freeBuffers.set(key, []);
this.freeUniformBuffers.set(key, []);
}
}
upload(id: GpuDataId, data: Uint8Array): void {
const srcArrayBuffer = data.buffer;
const srcOffset = data.byteOffset;
const srcLength = data.byteLength;
const size = calcNormalizedBufferSize(srcLength);
// get destination gpu buffer
const gpuDataCache = this.storageCache.get(id);
if (!gpuDataCache) {
throw new Error('gpu data for uploading does not exist');
}
if (gpuDataCache.originalSize !== srcLength) {
throw new Error(`inconsistent data size. gpu data size=${gpuDataCache.originalSize}, data size=${srcLength}`);
}
// create gpu buffer
const gpuBufferForUploading = this.backend.device.createBuffer(
// eslint-disable-next-line no-bitwise
{ mappedAtCreation: true, size, usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC },
);
// copy (upload) data
const arrayBuffer = gpuBufferForUploading.getMappedRange();
new Uint8Array(arrayBuffer).set(new Uint8Array(srcArrayBuffer, srcOffset, srcLength));
gpuBufferForUploading.unmap();
// GPU copy
const commandEncoder = this.backend.getCommandEncoder();
this.backend.endComputePass();
commandEncoder.copyBufferToBuffer(gpuBufferForUploading, 0, gpuDataCache.gpuData.buffer, 0, size);
LOG_DEBUG('verbose', () => `[WebGPU] GpuDataManager.upload(id=${id})`);
this.buffersForUploadingPending.push(gpuBufferForUploading);
}
memcpy(sourceId: GpuDataId, destinationId: GpuDataId): void {
// get source gpu buffer
const sourceGpuDataCache = this.storageCache.get(sourceId);
if (!sourceGpuDataCache) {
throw new Error('source gpu data for memcpy does not exist');
}
// get destination gpu buffer
const destinationGpuDataCache = this.storageCache.get(destinationId);
if (!destinationGpuDataCache) {
throw new Error('destination gpu data for memcpy does not exist');
}
if (sourceGpuDataCache.originalSize !== destinationGpuDataCache.originalSize) {
throw new Error('inconsistent source and destination gpu data size');
}
const size = calcNormalizedBufferSize(sourceGpuDataCache.originalSize);
// GPU copy
const commandEncoder = this.backend.getCommandEncoder();
this.backend.endComputePass();
commandEncoder.copyBufferToBuffer(
sourceGpuDataCache.gpuData.buffer,
0,
destinationGpuDataCache.gpuData.buffer,
0,
size,
);
}
registerExternalBuffer(buffer: GPUBuffer, originalSize: number, previousBuffer?: GPUBuffer): number {
let id: number | undefined;
if (previousBuffer) {
id = this.externalBuffers.get(previousBuffer);
if (id === undefined) {
throw new Error('previous buffer is not registered');
}
if (buffer === previousBuffer) {
LOG_DEBUG(
'verbose',
() =>
`[WebGPU] GpuDataManager.registerExternalBuffer(size=${originalSize}) => id=${
id
}, buffer is the same, skip.`,
);
return id;
} else if (this.backend.capturedCommandList.has(this.backend.currentSessionId!)) {
throw new Error(`Registering a different external buffer under graph capture mode is not supported yet.
Please use the previous external buffer!`);
}
this.externalBuffers.delete(previousBuffer);
} else {
id = createNewGpuDataId();
}
this.storageCache.set(id, { gpuData: { id, type: GpuDataType.default, buffer }, originalSize });
this.externalBuffers.set(buffer, id);
LOG_DEBUG(
'verbose',
() => `[WebGPU] GpuDataManager.registerExternalBuffer(size=${originalSize}) => id=${id}, registered.`,
);
return id;
}
unregisterExternalBuffer(buffer: GPUBuffer): void {
const id = this.externalBuffers.get(buffer);
if (id !== undefined) {
this.storageCache.delete(id);
this.externalBuffers.delete(buffer);
LOG_DEBUG('verbose', () => `[WebGPU] GpuDataManager.unregisterExternalBuffer() => id=${id}`);
}
}
// eslint-disable-next-line no-bitwise
create(size: number, usage = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST): GpuData {
const bufferSize = calcBucketBufferSize(size);
let gpuBuffer;
// Currently, only storage buffers are reused.
// eslint-disable-next-line no-bitwise
const isStorage = (usage & GPUBufferUsage.STORAGE) === GPUBufferUsage.STORAGE;
// eslint-disable-next-line no-bitwise
const isUniform = (usage & GPUBufferUsage.UNIFORM) === GPUBufferUsage.UNIFORM;
if (isStorage || isUniform) {
const freeBuffers = isStorage ? this.freeBuffers : this.freeUniformBuffers;
const buffers = freeBuffers.get(bufferSize);
if (!buffers) {
// no such bucket/freelist - create gpu buffer
gpuBuffer = this.backend.device.createBuffer({ size: bufferSize, usage });
} else {
if (buffers.length > 0) {
// in freelist, use it
gpuBuffer = buffers.pop() as GPUBuffer;
} else {
// bucket empty, create gpu buffer
gpuBuffer = this.backend.device.createBuffer({ size: bufferSize, usage });
}
}
} else {
// create gpu buffer
gpuBuffer = this.backend.device.createBuffer({ size: bufferSize, usage });
}
const gpuData = { id: createNewGpuDataId(), type: GpuDataType.default, buffer: gpuBuffer };
this.storageCache.set(gpuData.id, { gpuData, originalSize: size });
LOG_DEBUG('verbose', () => `[WebGPU] GpuDataManager.create(size=${size}) => id=${gpuData.id}`);
return gpuData;
}
get(id: GpuDataId): GpuData | undefined {
return this.storageCache.get(id)?.gpuData;
}
release(id: GpuDataId): number {
const cachedData = this.storageCache.get(id);
if (!cachedData) {
throw new Error('releasing data does not exist');
}
LOG_DEBUG('verbose', () => `[WebGPU] GpuDataManager.release(id=${id}), gpuDataId=${cachedData.gpuData.id}`);
this.storageCache.delete(id);
this.buffersPending.push(cachedData.gpuData.buffer);
// cachedData.gpuData.buffer.destroy();
return cachedData.originalSize;
}
async download(id: GpuDataId, getTargetBuffer: () => Uint8Array): Promise<void> {
const cachedData = this.storageCache.get(id);
if (!cachedData) {
throw new Error('data does not exist');
}
await downloadGpuData(this.backend, cachedData.gpuData.buffer, cachedData.originalSize, getTargetBuffer);
}
refreshPendingBuffers(): void {
for (const buffer of this.buffersForUploadingPending) {
// upload buffer is only useful in the session creation time. So we don't need to reuse them in session running.
buffer.destroy();
}
this.buffersForUploadingPending = [];
if (this.buffersPending.length === 0) {
return;
}
if (this.backend.sessionStatus === 'default') {
for (const buffer of this.buffersPending) {
const maxInFreeList = bucketFreelist.get(buffer.size);
// eslint-disable-next-line no-bitwise
if ((buffer.usage & GPUBufferUsage.STORAGE) === GPUBufferUsage.STORAGE) {
// Put the pending buffer to freeBuffers list instead of really destroying it for buffer reusing.
const freelist = this.freeBuffers.get(buffer.size) || [];
if (maxInFreeList === undefined || freelist.length >= maxInFreeList) {
buffer.destroy();
} else {
freelist.push(buffer);
}
// eslint-disable-next-line no-bitwise
} else if ((buffer.usage & GPUBufferUsage.UNIFORM) === GPUBufferUsage.UNIFORM) {
// Put the pending buffer to freeUniformBuffers list instead of really destroying it for buffer reusing.
const freelist = this.freeUniformBuffers.get(buffer.size) || [];
if (maxInFreeList === undefined || freelist.length >= maxInFreeList) {
buffer.destroy();
} else {
freelist.push(buffer);
}
} else {
buffer.destroy();
}
}
this.buffersPending = [];
} else {
// Don't release intermediate tensors in non-default mode.
// TODO: reuse the storage buffers in non-default mode.
let capturedBuffers = this.capturedPendingBuffers.get(this.backend.currentSessionId!);
if (!capturedBuffers) {
capturedBuffers = [];
this.capturedPendingBuffers.set(this.backend.currentSessionId!, capturedBuffers);
}
for (const buffer of this.buffersPending) {
capturedBuffers.push(buffer);
}
this.buffersPending = [];
}
}
dispose() {
this.freeBuffers.forEach((buffers) => {
buffers.forEach((buffer) => {
buffer.destroy();
});
});
this.freeUniformBuffers.forEach((buffers) => {
buffers.forEach((buffer) => {
buffer.destroy();
});
});
this.storageCache.forEach((storage) => {
storage.gpuData.buffer.destroy();
});
this.capturedPendingBuffers.forEach((buffers) => {
buffers.forEach((buffer) => {
buffer.destroy();
});
});
this.storageCache = new Map();
this.freeBuffers = new Map();
this.freeUniformBuffers = new Map();
this.capturedPendingBuffers = new Map();
}
onReleaseSession(sessionId: number) {
// release the captured pending buffers.
const pendingBuffers = this.capturedPendingBuffers.get(sessionId);
if (pendingBuffers) {
pendingBuffers.forEach((buffer) => {
buffer.destroy();
});
this.capturedPendingBuffers.delete(sessionId);
}
}
}
export const createGpuDataManager = (...args: ConstructorParameters<typeof GpuDataManagerImpl>): GpuDataManager =>
new GpuDataManagerImpl(...args);