POWER10: QGEMM optimization (#10642)

* POWER10: QGEMM optimization

This patch makes use of POWER10 MMA feature for QGEMM function.
This optimization includes signed and unsigned cases.Tested and
there are no new failures with gcc11 and clang-14.

* Changes as per review comments

Co-authored-by: Rajalakshmi Srinivasaraghavan <rajis@linux.ibm.com>
This commit is contained in:
RajalakshmiSR 2022-03-02 10:36:26 -06:00 committed by GitHub
parent e5c6dc1fc8
commit 5d8c5409ab
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 1205 additions and 0 deletions

View file

@ -334,13 +334,16 @@ else()
)
if (HAS_P10_RUNTIME)
set_source_files_properties(${MLAS_SRC_DIR}/platform.cpp PROPERTIES COMPILE_FLAGS "-DPOWER10")
set_source_files_properties(${MLAS_SRC_DIR}/qgemm.cpp PROPERTIES COMPILE_FLAGS "-DPOWER10")
endif()
set(mlas_platform_srcs_power10
${MLAS_SRC_DIR}/power/SgemmKernelPOWER10.cpp
${MLAS_SRC_DIR}/power/DgemmKernelPOWER10.cpp
${MLAS_SRC_DIR}/power/qgemm_kernel_power10.cpp
)
set_source_files_properties(${MLAS_SRC_DIR}/power/SgemmKernelPOWER10.cpp PROPERTIES COMPILE_FLAGS "-O2 -mcpu=power10 -DSINGLE")
set_source_files_properties(${MLAS_SRC_DIR}/power/DgemmKernelPOWER10.cpp PROPERTIES COMPILE_FLAGS "-O2 -mcpu=power10")
set_source_files_properties(${MLAS_SRC_DIR}/power/qgemm_kernel_power10.cpp PROPERTIES COMPILE_FLAGS "-O3 -mcpu=power10")
set(mlas_platform_srcs
${mlas_platform_srcs}
${mlas_platform_srcs_power10}

View file

@ -704,6 +704,7 @@ extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmU8X8DispatchUdot;
extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmS8S8DispatchSdot;
extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmU8X8DispatchWasmSimd;
extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmQuantDispatchDefault;
extern const MLAS_GEMM_QUANT_DISPATCH MlasGemm8X8DispatchPOWER10;
//
// Symmetric quantized qgemm dispatch structure
@ -849,6 +850,7 @@ struct MLAS_PLATFORM {
#if defined(MLAS_TARGET_POWER)
MLAS_GEMM_DOUBLE_KERNEL* GemmDoubleKernel;
const MLAS_GEMM_QUANT_DISPATCH* GemmU8X8Dispatch;
#endif
#if defined(MLAS_TARGET_AMD64)
MLAS_SGEMM_KERNEL_M1_ROUTINE* KernelM1Routine;

View file

@ -392,6 +392,7 @@ Return Value:
if (HasP10Instructions) {
this->GemmFloatKernel = MlasSgemmKernelPOWER10;
this->GemmDoubleKernel = MlasDgemmKernelPOWER10;
this->GemmU8X8Dispatch = &MlasGemm8X8DispatchPOWER10;
}
#endif
#endif

File diff suppressed because it is too large Load diff

View file

@ -849,6 +849,12 @@ MlasGemmQuantGetDispatch(
if (!AIsSigned) {
GemmQuantDispatch = &MlasGemmU8X8DispatchWasmSimd;
}
#elif defined(MLAS_TARGET_POWER) && defined(__linux__) && defined(POWER10) && \
((defined(__GNUC__) && ((__GNUC__ > 10) || (__GNUC__== 10 && __GNUC_MINOR__ >= 2))) || \
(defined(__clang__) && (__clang_major__ >= 12)))
if (GetMlasPlatform().GemmU8X8Dispatch == &MlasGemm8X8DispatchPOWER10) {
GemmQuantDispatch = GetMlasPlatform().GemmU8X8Dispatch;
}
#endif
if (nullptr == GemmQuantDispatch) {