mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-18 21:21:17 +00:00
MLAS: Apply 'small-M' optimization for column-vectors (#2971)
Apply 'small-M' optimization for column-vectors in MlasSgemmOperation
This commit is contained in:
parent
7ff5c0e5a3
commit
d5efbcb8d8
2 changed files with 29 additions and 0 deletions
|
|
@ -850,6 +850,34 @@ Return Value:
|
|||
|
||||
}
|
||||
|
||||
//
|
||||
// Handle the case when both B and C are column-vectors that are contiguous in memory.
|
||||
// Because transposition of such vectors doesn't change their layout, and
|
||||
// Transpose(A*B) = Transpose(B) * Transpose(A), we can apply the same 'small-M'
|
||||
// optimization as above, with A and B flipped.
|
||||
//
|
||||
if (N == 1 && ldb == 1 && ldc == 1 && alpha == 1.0f && (beta == 0.0f || beta == 1.0f)) {
|
||||
|
||||
#if defined(MLAS_TARGET_AMD64)
|
||||
|
||||
PMLAS_SGEMM_KERNEL_M1_ROUTINE SgemmKernelM1Routine;
|
||||
|
||||
if (TransA == CblasNoTrans) {
|
||||
SgemmKernelM1Routine = MlasPlatform.KernelM1TransposeBRoutine;
|
||||
} else {
|
||||
SgemmKernelM1Routine = MlasPlatform.KernelM1Routine;
|
||||
}
|
||||
|
||||
if (SgemmKernelM1Routine != nullptr) {
|
||||
SgemmKernelM1Routine(B, A, C, K, M, lda, beta);
|
||||
return;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Compute the strides to step through slices of the input matrices.
|
||||
//
|
||||
|
|
|
|||
|
|
@ -396,6 +396,7 @@ public:
|
|||
for (size_t a = 0; a < _countof(multipliers); a++) {
|
||||
for (size_t b = 0; b < _countof(multipliers); b++) {
|
||||
Test(1, N, K, multipliers[a], multipliers[b]);
|
||||
Test(N, 1, K, multipliers[a], multipliers[b]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue