mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-03 03:58:54 +00:00
[js/webgpu] reuse buffer for GpuDataManager (#16746)
### Description <!-- Describe your changes. --> Allocating new GPUBuffer in every session.run is not efficient. We should make it only happen in the first run. In the following runs, we should try to reuse those buffers. ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> - This PR is for performance. See mobilenetv2 becomes 9.58 ms from 12.9 ms.
This commit is contained in:
parent
d79515041c
commit
193415a162
1 changed files with 42 additions and 9 deletions
|
|
@ -45,6 +45,11 @@ export interface GpuDataManager {
|
|||
* actually released.
|
||||
*/
|
||||
refreshPendingBuffers(): void;
|
||||
|
||||
/**
|
||||
* destroy all gpu buffers. Call this when the session.release is called.
|
||||
*/
|
||||
dispose(): void;
|
||||
}
|
||||
|
||||
interface StorageCacheValue {
|
||||
|
|
@ -76,9 +81,12 @@ class GpuDataManagerImpl implements GpuDataManager {
|
|||
// pending buffers for computing
|
||||
private buffersPending: GPUBuffer[];
|
||||
|
||||
constructor(private backend: WebGpuBackend /* , private reuseBuffer: boolean */) {
|
||||
private freeBuffers: Map<number, GPUBuffer[]>;
|
||||
|
||||
constructor(private backend: WebGpuBackend) {
|
||||
this.storageCache = new Map();
|
||||
this.downloadCache = new Map();
|
||||
this.freeBuffers = new Map();
|
||||
this.buffersForUploadingPending = [];
|
||||
this.buffersPending = [];
|
||||
}
|
||||
|
|
@ -144,15 +152,20 @@ class GpuDataManagerImpl implements GpuDataManager {
|
|||
|
||||
// eslint-disable-next-line no-bitwise
|
||||
create(size: number, usage = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST): GpuData {
|
||||
// !!!
|
||||
// !!! IMPORTANT: TODO: whether we should keep the storage buffer every time, or always create new ones.
|
||||
// !!! This need to be figured out by performance test results.
|
||||
// !!!
|
||||
|
||||
const bufferSize = calcNormalizedBufferSize(size);
|
||||
|
||||
// create gpu buffer
|
||||
const gpuBuffer = this.backend.device.createBuffer({size: bufferSize, usage});
|
||||
let gpuBuffer;
|
||||
let buffers = this.freeBuffers.get(bufferSize);
|
||||
if (!buffers) {
|
||||
buffers = [];
|
||||
this.freeBuffers.set(bufferSize, buffers);
|
||||
}
|
||||
if (buffers.length > 0) {
|
||||
gpuBuffer = buffers.pop() as GPUBuffer;
|
||||
} else {
|
||||
// create gpu buffer
|
||||
gpuBuffer = this.backend.device.createBuffer({size: bufferSize, usage});
|
||||
}
|
||||
|
||||
const gpuData = {id: createNewGpuDataId(), type: GpuDataType.default, buffer: gpuBuffer};
|
||||
this.storageCache.set(gpuData.id, {gpuData, originalSize: size});
|
||||
|
|
@ -223,11 +236,31 @@ class GpuDataManagerImpl implements GpuDataManager {
|
|||
|
||||
refreshPendingBuffers(): void {
|
||||
for (const buffer of this.buffersForUploadingPending) {
|
||||
// upload buffer is only useful in the session creation time. So we don't need to reuse them in session running.
|
||||
buffer.destroy();
|
||||
}
|
||||
this.buffersForUploadingPending = [];
|
||||
for (const buffer of this.buffersPending) {
|
||||
buffer.destroy();
|
||||
// Put the pending buffer to freeBuffers list instead of really destroying it for buffer reusing.
|
||||
this.freeBuffers.get(buffer.size)!.push(buffer);
|
||||
}
|
||||
this.buffersPending = [];
|
||||
}
|
||||
|
||||
dispose() {
|
||||
this.freeBuffers.forEach((buffers) => {
|
||||
buffers.forEach(buffer => {
|
||||
buffer.destroy();
|
||||
});
|
||||
});
|
||||
|
||||
this.storageCache.forEach((storage) => {
|
||||
storage.gpuData.buffer.destroy();
|
||||
});
|
||||
|
||||
this.storageCache = new Map();
|
||||
this.downloadCache = new Map();
|
||||
this.freeBuffers = new Map();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue