MLAS: rename AVX512BW->AVX512Core (#3216)

Cleanup change: remap functions and files with Avx512BW to Avx512Core.
This commit is contained in:
Tracy Sharpe 2020-03-13 22:45:51 -07:00 committed by GitHub
parent 2a6e5ce978
commit 88c20eaef1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 81 additions and 78 deletions

View file

@ -48,12 +48,12 @@ if(MSVC)
set(mlas_platform_srcs
${ONNXRUNTIME_ROOT}/core/mlas/lib/amd64/QgemmU8S8KernelAvx2.asm
${ONNXRUNTIME_ROOT}/core/mlas/lib/amd64/QgemvU8S8KernelAvx2.asm
${ONNXRUNTIME_ROOT}/core/mlas/lib/amd64/QgemmU8S8KernelAvx512BW.asm
${ONNXRUNTIME_ROOT}/core/mlas/lib/amd64/QgemvU8S8KernelAvx512BW.asm
${ONNXRUNTIME_ROOT}/core/mlas/lib/amd64/QgemmU8S8KernelAvx512Core.asm
${ONNXRUNTIME_ROOT}/core/mlas/lib/amd64/QgemvU8S8KernelAvx512Core.asm
${ONNXRUNTIME_ROOT}/core/mlas/lib/amd64/QgemmU8S8KernelAvx512Vnni.asm
${ONNXRUNTIME_ROOT}/core/mlas/lib/amd64/QgemvU8S8KernelAvx512Vnni.asm
${ONNXRUNTIME_ROOT}/core/mlas/lib/amd64/QgemmU8U8KernelAvx2.asm
${ONNXRUNTIME_ROOT}/core/mlas/lib/amd64/QgemmU8U8KernelAvx512BW.asm
${ONNXRUNTIME_ROOT}/core/mlas/lib/amd64/QgemmU8U8KernelAvx512Core.asm
${ONNXRUNTIME_ROOT}/core/mlas/lib/amd64/QgemmU8U8KernelAvx512Vnni.asm
${ONNXRUNTIME_ROOT}/core/mlas/lib/amd64/DgemmKernelSse2.asm
${ONNXRUNTIME_ROOT}/core/mlas/lib/amd64/DgemmKernelAvx.asm
@ -185,25 +185,24 @@ else()
)
set_source_files_properties(${mlas_platform_srcs_avx2} PROPERTIES COMPILE_FLAGS "-mavx2 -mfma")
# Some platforms do not support AVX512 flags but still able to compile the source
# Others support the flag and refuse to compile without the flag.
# We have to run all 3 checks
# Some toolchains do not support AVX512 compiler flags but are still able
# to build the sources. Other toolchains require the AVX512 compiler flags
# to be specified.
check_cxx_compiler_flag("-mavx512f" HAS_AVX512F)
if(HAS_AVX512F)
set(CMAKE_REQUIRED_FLAGS "-mavx512f")
else()
set(CMAKE_REQUIRED_FLAGS "")
endif()
check_cxx_source_compiles("
int main() {
asm(\"vpxord %zmm0,%zmm0,%zmm0\");
return 0;
}"
AVX512F_COMPILES
COMPILES_AVX512F
)
if(AVX512F_COMPILES)
if(COMPILES_AVX512F)
set(mlas_platform_srcs_avx512f
${ONNXRUNTIME_ROOT}/core/mlas/lib/x86_64/DgemmKernelAvx512F.S
${ONNXRUNTIME_ROOT}/core/mlas/lib/x86_64/SgemmKernelAvx512F.S
@ -214,46 +213,44 @@ else()
set_source_files_properties(${mlas_platform_srcs_avx512f} PROPERTIES COMPILE_FLAGS "-mavx512f")
endif()
# AVX512BW support is only available if AVX512F support is present.
check_cxx_compiler_flag("-mavx512bw" HAS_AVX512BW)
if(HAS_AVX512BW)
set(CMAKE_REQUIRED_FLAGS "-mavx512bw")
check_cxx_compiler_flag("-mavx512bw -mavx512dq -mavx512vl" HAS_AVX512CORE)
if(HAS_AVX512CORE)
set(CMAKE_REQUIRED_FLAGS "-mavx512bw -mavx512dq -mavx512vl")
endif()
check_cxx_source_compiles("
int main() {
asm(\"vpmaddwd %zmm0,%zmm0,%zmm0\");
asm(\"vpmaddwd %zmm0,%zmm0,%zmm0\"); // AVX512BW feature
asm(\"vandnps %xmm31,%xmm31,%xmm31\"); // AVX512DQ/AVX512VL feature
return 0;
}"
AVX512BW_COMPILES
COMPILES_AVX512CORE
)
if(AVX512BW_COMPILES)
set(mlas_platform_srcs_avx512bw
${ONNXRUNTIME_ROOT}/core/mlas/lib/x86_64/QgemmU8S8KernelAvx512BW.S
${ONNXRUNTIME_ROOT}/core/mlas/lib/x86_64/QgemvU8S8KernelAvx512BW.S
if(COMPILES_AVX512CORE)
set(mlas_platform_srcs_avx512core
${ONNXRUNTIME_ROOT}/core/mlas/lib/x86_64/QgemmU8S8KernelAvx512Core.S
${ONNXRUNTIME_ROOT}/core/mlas/lib/x86_64/QgemvU8S8KernelAvx512Core.S
${ONNXRUNTIME_ROOT}/core/mlas/lib/x86_64/QgemmU8S8KernelAvx512Vnni.S
${ONNXRUNTIME_ROOT}/core/mlas/lib/x86_64/QgemvU8S8KernelAvx512Vnni.S
${ONNXRUNTIME_ROOT}/core/mlas/lib/x86_64/QgemmU8U8KernelAvx512BW.S
${ONNXRUNTIME_ROOT}/core/mlas/lib/x86_64/QgemmU8U8KernelAvx512Core.S
${ONNXRUNTIME_ROOT}/core/mlas/lib/x86_64/QgemmU8U8KernelAvx512Vnni.S
)
if(HAS_AVX512BW)
set_source_files_properties(${mlas_platform_srcs_avx512bw} PROPERTIES COMPILE_FLAGS "-mavx512bw")
if(HAS_AVX512CORE)
set_source_files_properties(${mlas_platform_srcs_avx512core} PROPERTIES COMPILE_FLAGS "-mavx512bw -mavx512dq -mavx512vl")
endif()
else() # AVX512BW_COMPILES
#
set_source_files_properties(${mlas_common_srcs} PROPERTIES COMPILE_FLAGS "-DMLAS_AVX512BW_UNSUPPORTED")
endif() # AVX512BW_COMPILES
else() # AVX512F_COMPILES
else()
set_source_files_properties(${mlas_common_srcs} PROPERTIES COMPILE_FLAGS "-DMLAS_AVX512CORE_UNSUPPORTED")
endif()
else()
set_source_files_properties(${mlas_common_srcs} PROPERTIES COMPILE_FLAGS "-DMLAS_AVX512F_UNSUPPORTED")
endif() # AVX512F_COMPILES
endif()
set(mlas_platform_srcs
${mlas_platform_srcs_sse2}
${mlas_platform_srcs_avx}
${mlas_platform_srcs_avx2}
${mlas_platform_srcs_avx512f}
${mlas_platform_srcs_avx512bw}
${mlas_platform_srcs_avx512core}
)
endif()
endif()

View file

@ -11,7 +11,7 @@
; Abstract:
;
; This module contains common kernel macros and structures for the quantized
; integer matrix/matrix multiply operation (QGEMM) for the AVX512BW and
; integer matrix/matrix multiply operation (QGEMM) for the AVX512 core and
; AVX512VNNI kernels.
;
;--

View file

@ -6,14 +6,14 @@
;
; Module Name:
;
; QgemmU8S8KernelAvx512BW.asm
; QgemmU8S8KernelAvx512Core.asm
;
; Abstract:
;
; This module implements the kernels for the quantized integer matrix/matrix
; multiply operation (QGEMM).
;
; This implementation uses AVX512BW instructions.
; This implementation uses AVX512 core instructions (BW/DQ/VL).
;
;--
@ -125,6 +125,6 @@ ENDIF
; Generate the GEMM kernel.
;
GemmU8X8KernelAvx512Function U8S8, Avx512BW
GemmU8X8KernelAvx512Function U8S8, Avx512Core
END

View file

@ -11,7 +11,7 @@
; Abstract:
;
; This module contains common kernel macros and structures for the quantized
; integer matrix/matrix multiply operation (QGEMM) for the AVX512BW and
; integer matrix/matrix multiply operation (QGEMM) for the AVX512 core and
; AVX512VNNI kernels.
;
;--

View file

@ -6,14 +6,14 @@
;
; Module Name:
;
; QgemmU8U8KernelAvx512BW.asm
; QgemmU8U8KernelAvx512Core.asm
;
; Abstract:
;
; This module implements the kernels for the quantized integer matrix/matrix
; multiply operation (QGEMM).
;
; This implementation uses AVX512BW instructions.
; This implementation uses AVX512 core instructions (BW/DQ/VL).
;
;--
@ -122,6 +122,6 @@ ENDIF
; Generate the GEMM kernel.
;
GemmU8X8KernelAvx512Function U8U8, Avx512BW
GemmU8X8KernelAvx512Function U8U8, Avx512Core
END

View file

@ -11,7 +11,7 @@
; Abstract:
;
; This module contains common kernel macros and structures for the quantized
; integer matrix/matrix multiply operation (QGEMM) for the AVX512BW and
; integer matrix/matrix multiply operation (QGEMM) for the AVX512 core and
; AVX512VNNI kernels.
;
;--
@ -369,7 +369,7 @@ GemmU8X8KernelAvx512Function MACRO Type, Isa
mov esi,-1
kmovw k1,esi ; update mask to write all columns
IFIDNI <Type>, <U8S8>
IFIDNI <Isa>, <Avx512BW>
IFIDNI <Isa>, <Avx512Core>
neg esi
vpbroadcastw zmm5,esi ; generate 512-bit word vector [0x0001]
ENDIF

View file

@ -11,7 +11,7 @@
; Abstract:
;
; This module contains common kernel macros and structures for the quantized
; integer matrix/vector multiply operation (QGEMV) for the AVX512BW and
; integer matrix/vector multiply operation (QGEMV) for the AVX512 core and
; AVX512VNNI kernels.
;
;--
@ -93,7 +93,7 @@ GemvU8S8KernelAvx512Function MACRO Isa
kmovw k1,eax ; compute vector load/store mask
mov rcx,GemvU8S8KernelFrame.ldb[rsp]
mov r11,rsp ; set ZeroMode to any non-zero value
IFIDNI <Isa>, <Avx512BW>
IFIDNI <Isa>, <Avx512Core>
mov eax,1
vpbroadcastw zmm29,eax
ENDIF
@ -136,7 +136,7 @@ ProcessColumnLoop4By64:
vpunpckhwd zmm17,zmm20,zmm22
vpunpcklwd zmm18,zmm21,zmm23
vpunpckhwd zmm19,zmm21,zmm23
IFIDNI <Isa>, <Avx512BW>
IFIDNI <Isa>, <Avx512Core>
vpmaddubsw zmm16,zmm28,zmm16
vpmaddwd zmm20,zmm16,zmm29
vpmaddubsw zmm17,zmm28,zmm17
@ -248,7 +248,7 @@ ComputeOutput4By16:
vinserti128 ymm5,ymm5,xmm1,1 ; concatenate 256-bit vector
vinserti128 ymm3,ymm3,xmm2,1
vshufi32x4 zmm16,zmm5,zmm3,044h ; concatenate 512-bit vector
IFIDNI <Isa>, <Avx512BW>
IFIDNI <Isa>, <Avx512Core>
vpmaddubsw zmm16,zmm28,zmm16
vpmaddwd zmm20,zmm16,zmm29
ELSE
@ -337,7 +337,7 @@ ComputeOutputSmallKBy16:
vinserti128 ymm5,ymm5,xmm1,1 ; concatenate 256-bit vector
vinserti128 ymm3,ymm3,xmm2,1
vshufi32x4 zmm16,zmm5,zmm3,044h ; concatenate 512-bit vector
IFIDNI <Isa>, <Avx512BW>
IFIDNI <Isa>, <Avx512Core>
vpmaddubsw zmm16,zmm28,zmm16
vpmaddwd zmm20,zmm16,zmm29
ELSE

View file

@ -6,14 +6,14 @@
;
; Module Name:
;
; QgemvU8S8KernelAvx512BW.asm
; QgemvU8S8KernelAvx512Core.asm
;
; Abstract:
;
; This module implements the kernels for the quantized integer matrix/vector
; multiply operation (QGEMV).
;
; This implementation uses AVX512BW instructions.
; This implementation uses AVX512 core instructions (BW/DQ/VL).
;
;--
@ -26,6 +26,6 @@ INCLUDE QgemvU8S8KernelAvx512Common.inc
; Generate the GEMV kernel.
;
GemvU8S8KernelAvx512Function Avx512BW
GemvU8S8KernelAvx512Function Avx512Core
END

View file

@ -493,14 +493,14 @@ extern "C" {
MLAS_GEMM_U8S8_COPY_PACKB_ROUTINE MlasGemmU8S8CopyPackBAvx2;
MLAS_GEMM_U8S8_KERNEL MlasGemmU8S8KernelAvx2;
MLAS_GEMV_U8S8_KERNEL MlasGemvU8S8KernelAvx2;
MLAS_GEMM_U8S8_KERNEL MlasGemmU8S8KernelAvx512BW;
MLAS_GEMV_U8S8_KERNEL MlasGemvU8S8KernelAvx512BW;
MLAS_GEMM_U8S8_KERNEL MlasGemmU8S8KernelAvx512Core;
MLAS_GEMV_U8S8_KERNEL MlasGemvU8S8KernelAvx512Core;
MLAS_GEMM_U8S8_KERNEL MlasGemmU8S8KernelAvx512Vnni;
MLAS_GEMV_U8S8_KERNEL MlasGemvU8S8KernelAvx512Vnni;
MLAS_GEMM_U8U8_COPY_PACKA_ROUTINE MlasGemmU8U8CopyPackAAvx2;
MLAS_GEMM_U8U8_COPY_PACKB_ROUTINE MlasGemmU8U8CopyPackBAvx2;
MLAS_GEMM_U8U8_KERNEL MlasGemmU8U8KernelAvx2;
MLAS_GEMM_U8U8_KERNEL MlasGemmU8U8KernelAvx512BW;
MLAS_GEMM_U8U8_KERNEL MlasGemmU8U8KernelAvx512Core;
MLAS_GEMM_U8U8_KERNEL MlasGemmU8U8KernelAvx512Vnni;
#endif
#endif

View file

@ -211,16 +211,19 @@ Return Value:
this->PoolFloatKernel[MlasAveragePoolingIncludePad] = MlasPoolAverageIncludePadFloatKernelAvx512F;
this->NchwcBlockSize = 16;
this->PreferredBufferAlignment = 64;
//
// Check if the processor supports AVX512BW.
//
#if !defined(MLAS_AVX512BW_UNSUPPORTED)
if ((Cpuid7[1] & 0x40000000) != 0) {
//
// Check if the processor supports AVX512 core features
// (AVX512BW/AVX512DQ/AVX512VL).
//
this->GemmU8S8Kernel = MlasGemmU8S8KernelAvx512BW;
this->GemvU8S8Kernel = MlasGemvU8S8KernelAvx512BW;
this->GemmU8U8Kernel = MlasGemmU8U8KernelAvx512BW;
#if !defined(MLAS_AVX512CORE_UNSUPPORTED)
if ((Cpuid7[1] & 0xC0020000) == 0xC0020000) {
this->GemmU8S8Kernel = MlasGemmU8S8KernelAvx512Core;
this->GemvU8S8Kernel = MlasGemvU8S8KernelAvx512Core;
this->GemmU8U8Kernel = MlasGemmU8U8KernelAvx512Core;
//
// Check if the processor supports AVX512VNNI.
@ -233,8 +236,11 @@ Return Value:
this->GemmU8U8Kernel = MlasGemmU8U8KernelAvx512Vnni;
}
}
#endif // MLAS_AVX512BW_UNSUPPORTED
#endif // MLAS_AVX512CORE_UNSUPPORTED
}
#endif // MLAS_AVX512F_UNSUPPORTED
}

View file

@ -11,7 +11,7 @@ Module Name:
Abstract:
This module contains common kernel macros and structures for the quantized
integer matrix/matrix multiply operation (QGEMM) for the AVX512BW and
integer matrix/matrix multiply operation (QGEMM) for the AVX512 core and
AVX512VNNI kernels.
--*/

View file

@ -6,14 +6,14 @@ Licensed under the MIT License.
Module Name:
QgemmU8S8KernelAvx512BW.s
QgemmU8S8KernelAvx512Core.s
Abstract:
This module implements the kernels for the quantized integer matrix/matrix
multiply operation (QGEMM).
This implementation uses AVX512BW instructions.
This implementation uses AVX512 core instructions (BW/DQ/VL).
--*/
@ -131,6 +131,6 @@ Implicit Arguments:
// Generate the GEMM kernel.
//
GemmU8X8KernelAvx512Function U8S8, Avx512BW
GemmU8X8KernelAvx512Function U8S8, Avx512Core
.end

View file

@ -11,7 +11,7 @@ Module Name:
Abstract:
This module contains common kernel macros and structures for the quantized
integer matrix/matrix multiply operation (QGEMM) for the AVX512BW and
integer matrix/matrix multiply operation (QGEMM) for the AVX512 core and
AVX512VNNI kernels.
--*/

View file

@ -6,14 +6,14 @@ Licensed under the MIT License.
Module Name:
QgemmU8U8KernelAvx512BW.s
QgemmU8U8KernelAvx512Core.s
Abstract:
This module implements the kernels for the quantized integer matrix/matrix
multiply operation (QGEMM).
This implementation uses AVX512BW instructions.
This implementation uses AVX512 core instructions (BW/DQ/VL).
--*/
@ -128,6 +128,6 @@ Implicit Arguments:
// Generate the GEMM kernel.
//
GemmU8X8KernelAvx512Function U8U8, Avx512BW
GemmU8X8KernelAvx512Function U8U8, Avx512Core
.end

View file

@ -11,7 +11,7 @@ Module Name:
Abstract:
This module contains common kernel macros and structures for the quantized
integer matrix/matrix multiply operation (QGEMM) for the AVX512BW and
integer matrix/matrix multiply operation (QGEMM) for the AVX512 core and
AVX512VNNI kernels.
--*/
@ -343,7 +343,7 @@ C_UNDERSCORE(MlasGemm\Type\()Kernel\Isa\()):
mov ebp,-1
kmovw k1,ebp # update mask to write all columns
.ifeqs "\Type\()", "U8S8"
.ifeqs "\Isa\()", "Avx512BW"
.ifeqs "\Isa\()", "Avx512Core"
neg ebp
vpbroadcastw zmm5,ebp # generate 512-bit word vector [0x0001]
.endif

View file

@ -11,7 +11,7 @@ Module Name:
Abstract:
This module contains common kernel macros and structures for the quantized
integer matrix/vector multiply operation (QGEMV) for the AVX512BW and
integer matrix/vector multiply operation (QGEMV) for the AVX512 core and
AVX512VNNI kernels.
--*/
@ -83,7 +83,7 @@ C_UNDERSCORE(MlasGemvU8S8Kernel\Isa\()):
mov rcx,rbx
mov r10,rdx
mov r11,rsp # set ZeroMode to any non-zero value
.ifeqs "\Isa\()", "Avx512BW"
.ifeqs "\Isa\()", "Avx512Core"
mov eax,1
vpbroadcastw zmm29,eax
.endif
@ -126,7 +126,7 @@ C_UNDERSCORE(MlasGemvU8S8Kernel\Isa\()):
vpunpckhwd zmm17,zmm20,zmm22
vpunpcklwd zmm18,zmm21,zmm23
vpunpckhwd zmm19,zmm21,zmm23
.ifeqs "\Isa\()", "Avx512BW"
.ifeqs "\Isa\()", "Avx512Core"
vpmaddubsw zmm16,zmm28,zmm16
vpmaddwd zmm20,zmm16,zmm29
vpmaddubsw zmm17,zmm28,zmm17
@ -234,7 +234,7 @@ C_UNDERSCORE(MlasGemvU8S8Kernel\Isa\()):
vinserti128 ymm5,ymm5,xmm1,1 # concatenate 256-bit vector
vinserti128 ymm3,ymm3,xmm2,1
vshufi32x4 zmm16,zmm5,zmm3,0x44 # concatenate 512-bit vector
.ifeqs "\Isa\()", "Avx512BW"
.ifeqs "\Isa\()", "Avx512Core"
vpmaddubsw zmm16,zmm28,zmm16
vpmaddwd zmm20,zmm16,zmm29
.else
@ -323,7 +323,7 @@ C_UNDERSCORE(MlasGemvU8S8Kernel\Isa\()):
vinserti128 ymm5,ymm5,xmm1,1 # concatenate 256-bit vector
vinserti128 ymm3,ymm3,xmm2,1
vshufi32x4 zmm16,zmm5,zmm3,0x44 # concatenate 512-bit vector
.ifeqs "\Isa\()", "Avx512BW"
.ifeqs "\Isa\()", "Avx512Core"
vpmaddubsw zmm16,zmm28,zmm16
vpmaddwd zmm20,zmm16,zmm29
.else

View file

@ -6,14 +6,14 @@ Licensed under the MIT License.
Module Name:
QgemvU8S8KernelAvx512BW.s
QgemvU8S8KernelAvx512Core.s
Abstract:
This module implements the kernels for the quantized integer matrix/vector
multiply operation (QGEMV).
This implementation uses AVX512BW instructions.
This implementation uses AVX512 core instructions (BW/DQ/VL).
--*/
@ -28,6 +28,6 @@ Abstract:
// Generate the GEMV kernel.
//
GemvU8S8KernelAvx512Function Avx512BW
GemvU8S8KernelAvx512Function Avx512Core
.end