From 6e1c3003ffc5a72d06c95ee381694e198ee0a2fb Mon Sep 17 00:00:00 2001 From: Dale Phurrough Date: Fri, 2 Jun 2023 01:32:55 +0200 Subject: [PATCH] DML EP and MLAS buffer allocator - increase alignment to 64 bytes for AVX-512 processing (#15141) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #13119 top concerns by * using `onnxruntime::AllocatorDefaultAlloc` instead of `malloc` * set `MLAS_DEFAULT_PREFERRED_BUFFER_ALIGNMENT=64` which cascades that value to several members and functions not directly related to MLAS. ### Motivation and Context * Fixes #13119 top concerns. Otherwise, alignment is to 16 bytes circa 1990s 👴 * Does not yet enable flexible alignment. Instead fixed at 64 (64 x 8 bits=512 bits) for modern NN hardware like AVX-512 --- onnxruntime/core/mlas/lib/mlasi.h | 2 +- .../src/BucketizedBufferAllocator.cpp | 9 ++------- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/onnxruntime/core/mlas/lib/mlasi.h b/onnxruntime/core/mlas/lib/mlasi.h index ea620877ed..7155a9dd27 100644 --- a/onnxruntime/core/mlas/lib/mlasi.h +++ b/onnxruntime/core/mlas/lib/mlasi.h @@ -782,7 +782,7 @@ extern "C" { // value. // -#define MLAS_DEFAULT_PREFERRED_BUFFER_ALIGNMENT 32 +#define MLAS_DEFAULT_PREFERRED_BUFFER_ALIGNMENT 64 // // Define the target number of per-thread multiplies before using another diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/BucketizedBufferAllocator.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/BucketizedBufferAllocator.cpp index f5abeb5d7b..aae73dca46 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/BucketizedBufferAllocator.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/BucketizedBufferAllocator.cpp @@ -244,17 +244,12 @@ namespace Dml void* CPUAllocator::Alloc(size_t size) { - if (size <= 0) - { - return nullptr; - } - void* p = malloc(size); - return p; + return onnxruntime::AllocatorDefaultAlloc(size); } void CPUAllocator::Free(void* p) { - free(p); + return onnxruntime::AllocatorDefaultFree(p); } } // namespace Dml