mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-23 22:13:38 +00:00
DML EP and MLAS buffer allocator - increase alignment to 64 bytes for AVX-512 processing (#15141)
Fixes #13119 top concerns by * using `onnxruntime::AllocatorDefaultAlloc` instead of `malloc` * set `MLAS_DEFAULT_PREFERRED_BUFFER_ALIGNMENT=64` which cascades that value to several members and functions not directly related to MLAS. ### Motivation and Context * Fixes #13119 top concerns. Otherwise, alignment is to 16 bytes circa 1990s 👴 * Does not yet enable flexible alignment. Instead fixed at 64 (64 x 8 bits=512 bits) for modern NN hardware like AVX-512
This commit is contained in:
parent
5a4c3b7937
commit
6e1c3003ff
2 changed files with 3 additions and 8 deletions
|
|
@ -782,7 +782,7 @@ extern "C" {
|
|||
// value.
|
||||
//
|
||||
|
||||
#define MLAS_DEFAULT_PREFERRED_BUFFER_ALIGNMENT 32
|
||||
#define MLAS_DEFAULT_PREFERRED_BUFFER_ALIGNMENT 64
|
||||
|
||||
//
|
||||
// Define the target number of per-thread multiplies before using another
|
||||
|
|
|
|||
|
|
@ -244,17 +244,12 @@ namespace Dml
|
|||
|
||||
void* CPUAllocator::Alloc(size_t size)
|
||||
{
|
||||
if (size <= 0)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
void* p = malloc(size);
|
||||
return p;
|
||||
return onnxruntime::AllocatorDefaultAlloc(size);
|
||||
}
|
||||
|
||||
void CPUAllocator::Free(void* p)
|
||||
{
|
||||
free(p);
|
||||
return onnxruntime::AllocatorDefaultFree(p);
|
||||
}
|
||||
|
||||
} // namespace Dml
|
||||
|
|
|
|||
Loading…
Reference in a new issue