mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-17 21:10:43 +00:00
POWER10: QGEMM optimization (#10642)
* POWER10: QGEMM optimization This patch makes use of POWER10 MMA feature for QGEMM function. This optimization includes signed and unsigned cases.Tested and there are no new failures with gcc11 and clang-14. * Changes as per review comments Co-authored-by: Rajalakshmi Srinivasaraghavan <rajis@linux.ibm.com>
This commit is contained in:
parent
e5c6dc1fc8
commit
5d8c5409ab
5 changed files with 1205 additions and 0 deletions
|
|
@ -334,13 +334,16 @@ else()
|
|||
)
|
||||
if (HAS_P10_RUNTIME)
|
||||
set_source_files_properties(${MLAS_SRC_DIR}/platform.cpp PROPERTIES COMPILE_FLAGS "-DPOWER10")
|
||||
set_source_files_properties(${MLAS_SRC_DIR}/qgemm.cpp PROPERTIES COMPILE_FLAGS "-DPOWER10")
|
||||
endif()
|
||||
set(mlas_platform_srcs_power10
|
||||
${MLAS_SRC_DIR}/power/SgemmKernelPOWER10.cpp
|
||||
${MLAS_SRC_DIR}/power/DgemmKernelPOWER10.cpp
|
||||
${MLAS_SRC_DIR}/power/qgemm_kernel_power10.cpp
|
||||
)
|
||||
set_source_files_properties(${MLAS_SRC_DIR}/power/SgemmKernelPOWER10.cpp PROPERTIES COMPILE_FLAGS "-O2 -mcpu=power10 -DSINGLE")
|
||||
set_source_files_properties(${MLAS_SRC_DIR}/power/DgemmKernelPOWER10.cpp PROPERTIES COMPILE_FLAGS "-O2 -mcpu=power10")
|
||||
set_source_files_properties(${MLAS_SRC_DIR}/power/qgemm_kernel_power10.cpp PROPERTIES COMPILE_FLAGS "-O3 -mcpu=power10")
|
||||
set(mlas_platform_srcs
|
||||
${mlas_platform_srcs}
|
||||
${mlas_platform_srcs_power10}
|
||||
|
|
|
|||
|
|
@ -704,6 +704,7 @@ extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmU8X8DispatchUdot;
|
|||
extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmS8S8DispatchSdot;
|
||||
extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmU8X8DispatchWasmSimd;
|
||||
extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmQuantDispatchDefault;
|
||||
extern const MLAS_GEMM_QUANT_DISPATCH MlasGemm8X8DispatchPOWER10;
|
||||
|
||||
//
|
||||
// Symmetric quantized qgemm dispatch structure
|
||||
|
|
@ -849,6 +850,7 @@ struct MLAS_PLATFORM {
|
|||
|
||||
#if defined(MLAS_TARGET_POWER)
|
||||
MLAS_GEMM_DOUBLE_KERNEL* GemmDoubleKernel;
|
||||
const MLAS_GEMM_QUANT_DISPATCH* GemmU8X8Dispatch;
|
||||
#endif
|
||||
#if defined(MLAS_TARGET_AMD64)
|
||||
MLAS_SGEMM_KERNEL_M1_ROUTINE* KernelM1Routine;
|
||||
|
|
|
|||
|
|
@ -392,6 +392,7 @@ Return Value:
|
|||
if (HasP10Instructions) {
|
||||
this->GemmFloatKernel = MlasSgemmKernelPOWER10;
|
||||
this->GemmDoubleKernel = MlasDgemmKernelPOWER10;
|
||||
this->GemmU8X8Dispatch = &MlasGemm8X8DispatchPOWER10;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
|||
1193
onnxruntime/core/mlas/lib/power/qgemm_kernel_power10.cpp
Normal file
1193
onnxruntime/core/mlas/lib/power/qgemm_kernel_power10.cpp
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -849,6 +849,12 @@ MlasGemmQuantGetDispatch(
|
|||
if (!AIsSigned) {
|
||||
GemmQuantDispatch = &MlasGemmU8X8DispatchWasmSimd;
|
||||
}
|
||||
#elif defined(MLAS_TARGET_POWER) && defined(__linux__) && defined(POWER10) && \
|
||||
((defined(__GNUC__) && ((__GNUC__ > 10) || (__GNUC__== 10 && __GNUC_MINOR__ >= 2))) || \
|
||||
(defined(__clang__) && (__clang_major__ >= 12)))
|
||||
if (GetMlasPlatform().GemmU8X8Dispatch == &MlasGemm8X8DispatchPOWER10) {
|
||||
GemmQuantDispatch = GetMlasPlatform().GemmU8X8Dispatch;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (nullptr == GemmQuantDispatch) {
|
||||
|
|
|
|||
Loading…
Reference in a new issue