From 193415a1625abc8f955771c7cd8dd33650e93d4e Mon Sep 17 00:00:00 2001 From: Jiajia Qin Date: Sat, 22 Jul 2023 04:13:01 +0800 Subject: [PATCH] [js/webgpu] reuse buffer for GpuDataManager (#16746) ### Description Allocating new GPUBuffer in every session.run is not efficient. We should make it only happen in the first run. In the following runs, we should try to reuse those buffers. ### Motivation and Context - This PR is for performance. See mobilenetv2 becomes 9.58 ms from 12.9 ms. --- .../lib/wasm/jsep/webgpu/gpu-data-manager.ts | 51 +++++++++++++++---- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/js/web/lib/wasm/jsep/webgpu/gpu-data-manager.ts b/js/web/lib/wasm/jsep/webgpu/gpu-data-manager.ts index 526ae68ac9..784b9a1d54 100644 --- a/js/web/lib/wasm/jsep/webgpu/gpu-data-manager.ts +++ b/js/web/lib/wasm/jsep/webgpu/gpu-data-manager.ts @@ -45,6 +45,11 @@ export interface GpuDataManager { * actually released. */ refreshPendingBuffers(): void; + + /** + * destroy all gpu buffers. Call this when the session.release is called. + */ + dispose(): void; } interface StorageCacheValue { @@ -76,9 +81,12 @@ class GpuDataManagerImpl implements GpuDataManager { // pending buffers for computing private buffersPending: GPUBuffer[]; - constructor(private backend: WebGpuBackend /* , private reuseBuffer: boolean */) { + private freeBuffers: Map; + + constructor(private backend: WebGpuBackend) { this.storageCache = new Map(); this.downloadCache = new Map(); + this.freeBuffers = new Map(); this.buffersForUploadingPending = []; this.buffersPending = []; } @@ -144,15 +152,20 @@ class GpuDataManagerImpl implements GpuDataManager { // eslint-disable-next-line no-bitwise create(size: number, usage = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST): GpuData { - // !!! - // !!! IMPORTANT: TODO: whether we should keep the storage buffer every time, or always create new ones. - // !!! This need to be figured out by performance test results. - // !!! - const bufferSize = calcNormalizedBufferSize(size); - // create gpu buffer - const gpuBuffer = this.backend.device.createBuffer({size: bufferSize, usage}); + let gpuBuffer; + let buffers = this.freeBuffers.get(bufferSize); + if (!buffers) { + buffers = []; + this.freeBuffers.set(bufferSize, buffers); + } + if (buffers.length > 0) { + gpuBuffer = buffers.pop() as GPUBuffer; + } else { + // create gpu buffer + gpuBuffer = this.backend.device.createBuffer({size: bufferSize, usage}); + } const gpuData = {id: createNewGpuDataId(), type: GpuDataType.default, buffer: gpuBuffer}; this.storageCache.set(gpuData.id, {gpuData, originalSize: size}); @@ -223,11 +236,31 @@ class GpuDataManagerImpl implements GpuDataManager { refreshPendingBuffers(): void { for (const buffer of this.buffersForUploadingPending) { + // upload buffer is only useful in the session creation time. So we don't need to reuse them in session running. buffer.destroy(); } + this.buffersForUploadingPending = []; for (const buffer of this.buffersPending) { - buffer.destroy(); + // Put the pending buffer to freeBuffers list instead of really destroying it for buffer reusing. + this.freeBuffers.get(buffer.size)!.push(buffer); } + this.buffersPending = []; + } + + dispose() { + this.freeBuffers.forEach((buffers) => { + buffers.forEach(buffer => { + buffer.destroy(); + }); + }); + + this.storageCache.forEach((storage) => { + storage.gpuData.buffer.destroy(); + }); + + this.storageCache = new Map(); + this.downloadCache = new Map(); + this.freeBuffers = new Map(); } }