From bcffb5aa1df30190d10eb6b2d05ff1f9fb27d8a0 Mon Sep 17 00:00:00 2001 From: cpuhrsch Date: Fri, 4 May 2018 14:41:09 +0000 Subject: [PATCH] Remove SLEEF and all dependent code paths (#7268) Temporarily remove this dependency. --- aten/src/ATen/CMakeLists.txt | 9 +- aten/src/ATen/cpu/sleef/CMakeLists.txt | 71 ---- aten/src/ATen/cpu/sleef/Configure.cmake | 373 -------------------- aten/src/ATen/cpu/vec256/vec256_double.h | 31 -- aten/src/ATen/cpu/vec256/vec256_float.h | 31 -- aten/src/ATen/native/UnaryOps.cpp | 85 ++--- aten/src/ATen/native/cpu/UnaryOpsKernel.cpp | 102 ------ aten/src/ATen/native/cpu/UnaryOpsKernel.h | 49 --- third_party/sleef | 1 - 9 files changed, 39 insertions(+), 713 deletions(-) delete mode 100644 aten/src/ATen/cpu/sleef/CMakeLists.txt delete mode 100644 aten/src/ATen/cpu/sleef/Configure.cmake delete mode 100644 aten/src/ATen/native/cpu/UnaryOpsKernel.cpp delete mode 100644 aten/src/ATen/native/cpu/UnaryOpsKernel.h delete mode 160000 third_party/sleef diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt index a6567234797..6fd48aad337 100644 --- a/aten/src/ATen/CMakeLists.txt +++ b/aten/src/ATen/CMakeLists.txt @@ -17,6 +17,8 @@ ENDIF() IF(NOT MSVC) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-ignored-qualifiers") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-ignored-qualifiers") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-absolute-value") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-absolute-value") ENDIF(NOT MSVC) ######################## @@ -366,13 +368,6 @@ if (NOT TARGET cpuinfo) endif() TARGET_LINK_LIBRARIES(ATen cpuinfo) -# ---[ Configure SLEEF -IF(NOT TARGET sleef) - add_subdirectory("cpu/sleef") - include_directories(SYSTEM ${CMAKE_BINARY_DIR}/include) -ENDIF() -TARGET_LINK_LIBRARIES(ATen sleef) - IF(CUDA_FOUND) IF ($ENV{ATEN_STATIC_CUDA}) # CuFFT has a complicated static story (especially around CUDA < 9) because it has device callback support diff --git a/aten/src/ATen/cpu/sleef/CMakeLists.txt b/aten/src/ATen/cpu/sleef/CMakeLists.txt deleted file mode 100644 index e4571472d70..00000000000 --- a/aten/src/ATen/cpu/sleef/CMakeLists.txt +++ /dev/null @@ -1,71 +0,0 @@ -IF(MSVC) - option(BUILD_SHARED_LIBS "Build shared libs" ON) -ELSE(MSVC) - option(BUILD_SHARED_LIBS "Build shared libs" OFF) -ENDIF(MSVC) -option(SLEEF_SHOW_ERROR_LOG "Show cmake error log." OFF) - -set(SLEEF_VERSION_MAJOR 3) -set(SLEEF_VERSION_MINOR 2) -set(SLEEF_SOVERSION ${SLEEF_VERSION_MAJOR}) - -# Sanity check for in-source builds which we do not want to happen -if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR) - message(FATAL_ERROR "SLEEF does not allow in-source builds. -You can refer to doc/build-with-cmake.md for instructions on how provide a \ -separate build directory. Note: Please remove autogenerated file \ -`CMakeCache.txt` and directory `CMakeFiles` in the current directory.") -endif() - -# Set output directories for the library files -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin) - -foreach(CONFIG ${CMAKE_CONFIGURATION_TYPES}) - string(TOUPPER ${CONFIG} CONFIG) - set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_${CONFIG} ${PROJECT_BINARY_DIR}/lib) - set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_${CONFIG} ${PROJECT_BINARY_DIR}/lib) - set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_${CONFIG} ${PROJECT_BINARY_DIR}/bin) -endforeach(CONFIG CMAKE_CONFIGURATION_TYPES) - -# Path for finding cmake modules -set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/../third_party/sleef/cmake/Modules) -set(SLEEF_SCRIPT_PATH ${PROJECT_SOURCE_DIR}/../third_party/sleef/cmake/Scripts CACHE PATH - "Path for finding sleef specific cmake scripts") - -# sleef-config.h.in passes cmake settings to the source code -include(${CMAKE_CURRENT_SOURCE_DIR}/Configure.cmake) -configure_file( - ${PROJECT_SOURCE_DIR}/../third_party/sleef/sleef-config.h.in - ${PROJECT_BINARY_DIR}/include/sleef-config.h @ONLY) - -# Generates object file (shared library) `libsleef` -# Defined in src/libm/CMakeLists.txt via command add_library -set(TARGET_LIBSLEEF "sleef") -set(TARGET_LIBSLEEFGNUABI "sleefgnuabi") -# Generates the sleef.h headers and all the rename headers -# Defined in src/libm/CMakeLists.txt via custom commands and a custom target -set(TARGET_HEADERS "headers") -set(TARGET_MKRENAME "mkrename") -set(TARGET_MKRENAME_GNUABI "mkrename_gnuabi") -set(TARGET_MKMASKED_GNUABI "mkmasked_gnuabi") -set(TARGET_MKDISP "mkdisp") -set(TARGET_MKALIAS "mkalias") -set(TARGET_LIBCOMMON_OBJ "common") -set(TARGET_LIBARRAYMAP_OBJ "arraymap") - -function(add_host_executable TARGETNAME) - if (NOT CMAKE_CROSSCOMPILING) - add_executable(${TARGETNAME} ${ARGN}) - else() - add_executable(${TARGETNAME} IMPORTED) - set_property(TARGET ${TARGETNAME} PROPERTY IMPORTED_LOCATION ${NATIVE_BUILD_DIR}/bin/${TARGETNAME}) - endif() -endfunction() - -include_directories("${PROJECT_SOURCE_DIR}/../third_party/sleef/src/common") -include_directories("${PROJECT_SOURCE_DIR}/../third_party/sleef/src/arch") - -add_subdirectory("${PROJECT_SOURCE_DIR}/../third_party/sleef/src/libm" "${CMAKE_CURRENT_BINARY_DIR}/sleef/libm") -add_subdirectory("${PROJECT_SOURCE_DIR}/../third_party/sleef/src/common" "${CMAKE_CURRENT_BINARY_DIR}/sleef/common") diff --git a/aten/src/ATen/cpu/sleef/Configure.cmake b/aten/src/ATen/cpu/sleef/Configure.cmake deleted file mode 100644 index 4c8a75eda18..00000000000 --- a/aten/src/ATen/cpu/sleef/Configure.cmake +++ /dev/null @@ -1,373 +0,0 @@ -include(CheckCCompilerFlag) -include(CheckCSourceCompiles) -include(CheckTypeSize) - -# Some toolchains require explicit linking of the libraries following. -find_library(LIB_MPFR mpfr) -find_library(LIBM m) -find_library(LIBGMP gmp) -find_library(LIBRT rt) - -find_path(MPFR_INCLUDE_DIR - NAMES mpfr.h - ONLY_CMAKE_FIND_ROOT_PATH) - -if (NOT LIBM) - set(LIBM "") -endif() - -if (NOT LIBRT) - set(LIBRT "") -endif() - -# The library currently supports the following SIMD architectures -set(SLEEF_SUPPORTED_EXTENSIONS - AVX2 AVX2128 AVX SSE4 SSE2 # x86 - ADVSIMD SVE # Aarch64 - NEON32 # Aarch32 - CACHE STRING "List of SIMD architectures supported by libsleef." - ) -set(SLEEF_SUPPORTED_GNUABI_EXTENSIONS - SSE2 AVX AVX2 ADVSIMD SVE - CACHE STRING "List of SIMD architectures supported by libsleef for GNU ABI." -) - -# Force set default build type if none was specified -# Note: some sleef code requires the optimisation flags turned on -if(NOT CMAKE_BUILD_TYPE) - message(STATUS "Setting build type to 'Release' (required for full support).") - set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE) - set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS - "Debug" "Release" "RelWithDebInfo" "MinSizeRel") -endif() - -# Function used to generate safe command arguments for add_custom_command -function(command_arguments PROPNAME) - set(quoted_args "") - foreach(arg ${ARGN}) - list(APPEND quoted_args "\"${arg}\"" ) - endforeach() - set(${PROPNAME} ${quoted_args} PARENT_SCOPE) -endfunction() - -# PLATFORM DETECTION -if((CMAKE_SYSTEM_PROCESSOR MATCHES "x86") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64")) - set(SLEEF_ARCH_X86 ON CACHE INTERNAL "True for x86 architecture.") - - set(SLEEF_HEADER_LIST - SSE_ - SSE2 - SSE4 - AVX_ - AVX - AVX2 - AVX2128 - ) - command_arguments(HEADER_PARAMS_SSE_ 2 4 __m128d __m128 __m128i __m128i __SSE2__) - command_arguments(HEADER_PARAMS_SSE2 2 4 __m128d __m128 __m128i __m128i __SSE2__ sse2) - command_arguments(HEADER_PARAMS_SSE4 2 4 __m128d __m128 __m128i __m128i __SSE2__ sse4) - command_arguments(HEADER_PARAMS_AVX_ 4 8 __m256d __m256 __m128i "struct { __m128i x, y$ }" __AVX__) - command_arguments(HEADER_PARAMS_AVX 4 8 __m256d __m256 __m128i "struct { __m128i x, y$ }" __AVX__ avx) - command_arguments(HEADER_PARAMS_AVX2 4 8 __m256d __m256 __m128i __m256i __AVX__ avx2) - command_arguments(HEADER_PARAMS_AVX2128 2 4 __m128d __m128 __m128i __m128i __SSE2__ avx2128) - -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") - set(SLEEF_ARCH_AARCH64 ON CACHE INTERNAL "True for Aarch64 architecture.") - # Aarch64 requires support for advsimdfma4 - set(COMPILER_SUPPORTS_ADVSIMD 1) - - set(SLEEF_HEADER_LIST - ADVSIMD_ - ADVSIMD - SVE - ) - command_arguments(HEADER_PARAMS_ADVSIMD_ 2 4 float64x2_t float32x4_t int32x2_t int32x4_t __ARM_NEON) - command_arguments(HEADER_PARAMS_ADVSIMD 2 4 float64x2_t float32x4_t int32x2_t int32x4_t __ARM_NEON advsimd) - command_arguments(HEADER_PARAMS_SVE 2 4 svfloat64_t svfloat32_t svint32_t svint32_t __ARM_FEATURE_SVE sve) - - command_arguments(ALIAS_PARAMS_ADVSIMD_DP 2 float64x2_t int32x2_t n advsimd) - command_arguments(ALIAS_PARAMS_ADVSIMD_SP -4 float32x4_t int32x4_t n advsimd) -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm") - set(SLEEF_ARCH_AARCH32 ON CACHE INTERNAL "True for Aarch32 architecture.") - set(COMPILER_SUPPORTS_NEON32 1) - - set(SLEEF_HEADER_LIST - NEON32_ - NEON32 - ) - command_arguments(HEADER_PARAMS_NEON32_ 2 4 - float32x4_t int32x2_t int32x4_t __ARM_NEON__) - command_arguments(HEADER_PARAMS_NEON32 2 4 - float32x4_t int32x2_t int32x4_t __ARM_NEON__ neon) - - command_arguments(ALIAS_PARAMS_NEON32_SP -4 float32x4_t int32x4_t - neon) - command_arguments(ALIAS_PARAMS_NEON32_DP 0) -endif() - -# MKRename arguments per type -command_arguments(RENAME_PARAMS_SSE2 2 4 sse2) -command_arguments(RENAME_PARAMS_SSE4 2 4 sse4) -command_arguments(RENAME_PARAMS_AVX 4 8 avx) -command_arguments(RENAME_PARAMS_AVX2 4 8 avx2) -command_arguments(RENAME_PARAMS_AVX2128 2 4 avx2128) -command_arguments(RENAME_PARAMS_ADVSIMD 2 4 advsimd) -command_arguments(RENAME_PARAMS_NEON32 2 4 neon) -# The vector length parameters in SVE, for SP and DP, are chosen for -# the smallest SVE vector size (128-bit). The name is generated using -# the "x" token of VLA SVE vector functions. -command_arguments(RENAME_PARAMS_SVE 2 4 sve) - -command_arguments(RENAME_PARAMS_GNUABI_SSE2 sse2 b 2 4 _mm128d _mm128 _mm128i _mm128i __SSE2__) -command_arguments(RENAME_PARAMS_GNUABI_AVX avx c 4 8 __m256d __m256 __m128i "struct { __m128i x, y$ }" __AVX__) -command_arguments(RENAME_PARAMS_GNUABI_AVX2 avx2 d 4 8 __m256d __m256 __m128i __m256i __AVX2__) -command_arguments(RENAME_PARAMS_GNUABI_ADVSIMD advsimd n 2 4 float64x2_t float32x4_t int32x2_t int32x4_t __ARM_NEON) -# The vector length parameters in SVE, for SP and DP, are chosen for -# the smallest SVE vector size (128-bit). The name is generated using -# the "x" token of VLA SVE vector functions. -command_arguments(RENAME_PARAMS_GNUABI_SVE sve s 2 4 svfloat64_t svfloat32_t svint32_t svint32_t __ARM_SVE) - - -command_arguments(MKMASKED_PARAMS_GNUABI_SVE_dp sve s 2) -command_arguments(MKMASKED_PARAMS_GNUABI_SVE_sp sve s -4) - -# COMPILER DETECTION - -# Detect CLANG executable path (on both Windows and Linux/OSX) -if(NOT CLANG_EXE_PATH) - # If the current compiler used by CMAKE is already clang, use this one directly - if(CMAKE_C_COMPILER MATCHES "clang") - set(CLANG_EXE_PATH ${CMAKE_C_COMPILER}) - else() - # Else we may find clang on the path? - find_program(CLANG_EXE_PATH NAMES clang "clang-5.0" "clang-4.0" "clang-3.9") - endif() -endif() - -# Allow to define the Gcc/Clang here -# As we might compile the lib with MSVC, but generates bitcode with CLANG -# Intel vector extensions. -set(CLANG_FLAGS_ENABLE_SSE2 "-msse2") -set(CLANG_FLAGS_ENABLE_SSE4 "-msse4.1") -set(CLANG_FLAGS_ENABLE_AVX "-mavx") -set(CLANG_FLAGS_ENABLE_AVX2 "-mavx2;-mfma") -set(CLANG_FLAGS_ENABLE_AVX2128 "-mavx2;-mfma") -set(CLANG_FLAGS_ENABLE_NEON32 "--target=arm-linux-gnueabihf;-mcpu=cortex-a8") -# Arm AArch64 vector extensions. -set(CLANG_FLAGS_ENABLE_ADVSIMD "-march=armv8-a+simd") -set(CLANG_FLAGS_ENABLE_SVE "-march=armv8-a+sve") - -# All variables storing compiler flags should be prefixed with FLAGS_ -if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") - # Always compile sleef with -ffp-contract. - set(FLAGS_STRICTMATH "-ffp-contract=off") - set(FLAGS_FASTMATH "-ffast-math") - - # Without the options below, gcc generates calls to libm - set(FLAGS_NO_ERRNO "-fno-math-errno -fno-trapping-math") - - # Intel vector extensions. - foreach(SIMD ${SLEEF_SUPPORTED_EXTENSIONS}) - set(FLAGS_ENABLE_${SIMD} ${CLANG_FLAGS_ENABLE_${SIMD}}) - endforeach() - - # Warning flags. - set(FLAGS_WALL "-Wall -Wno-unused -Wno-attributes -Wno-unused-result") - if(CMAKE_C_COMPILER_ID MATCHES "GNU") - # The following compiler option is needed to suppress the warning - # "AVX vector return without AVX enabled changes the ABI" at - # src/arch/helpervecext.h:88 - string(CONCAT FLAGS_WALL ${FLAGS_WALL} " -Wno-psabi") - set(FLAGS_ENABLE_NEON32 "-mfpu=neon") - endif(CMAKE_C_COMPILER_ID MATCHES "GNU") -elseif(MSVC) - # Intel vector extensions. - set(FLAGS_ENABLE_SSE2 /D__SSE2__) - set(FLAGS_ENABLE_SSE4 /D__SSE2__ /D__SSE3__ /D__SSE4_1__) - set(FLAGS_ENABLE_AVX /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /D__AVX__ /arch:AVX) - set(FLAGS_ENABLE_AVX2 /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /D__AVX__ /D__AVX2__ /arch:AVX2) - set(FLAGS_ENABLE_AVX2128 /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /D__AVX__ /D__AVX2__ /arch:AVX2) - set(FLAGS_WALL "/D_CRT_SECURE_NO_WARNINGS") - set(FLAGS_NO_ERRNO "") -elseif(CMAKE_C_COMPILER_ID MATCHES "Intel") - set(FLAGS_ENABLE_SSE2 "-msse2") - set(FLAGS_ENABLE_SSE4 "-msse4.1") - set(FLAGS_ENABLE_AVX "-mavx") - set(FLAGS_ENABLE_AVX2 "-march=core-avx2") - set(FLAGS_ENABLE_AVX2128 "-march=core-avx2") - set(FLAGS_STRICTMATH "-fp-model strict -Qoption,cpp,--extended_float_type -qoverride-limits") - set(FLAGS_FASTMATH "-fp-model fast=2 -Qoption,cpp,--extended_float_type -qoverride-limits") - set(FLAGS_WALL "-fmax-errors=3 -Wall -Wno-unused -Wno-attributes") - set(FLAGS_NO_ERRNO "") -endif() - -set(SLEEF_C_FLAGS "${FLAGS_WALL} ${FLAGS_STRICTMATH} ${FLAGS_NO_ERRNO}") -if(CMAKE_C_COMPILER_ID MATCHES "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER 6.99) - set(DFT_C_FLAGS "${FLAGS_WALL}") -else() - set(DFT_C_FLAGS "${FLAGS_WALL} ${FLAGS_FASTMATH}") -endif() - -if(CYGWIN OR MINGW) - set(SLEEF_C_FLAGS "${SLEEF_C_FLAGS} -fno-asynchronous-unwind-tables") - set(DFT_C_FLAGS "${DFT_C_FLAGS} -fno-asynchronous-unwind-tables") -endif() - -# FEATURE DETECTION - -CHECK_TYPE_SIZE("long double" LD_SIZE) -if(LD_SIZE GREATER "9") - # This is needed to check since internal compiler error occurs with gcc 4.x - CHECK_C_SOURCE_COMPILES(" - typedef long double vlongdouble __attribute__((vector_size(sizeof(long double)*2))); - vlongdouble vcast_vl_l(long double d) { return (vlongdouble) { d, d }; } - int main() { vlongdouble vld = vcast_vl_l(0); - }" COMPILER_SUPPORTS_LONG_DOUBLE) -endif() - -CHECK_C_SOURCE_COMPILES(" - int main() { __float128 r = 1; - }" COMPILER_SUPPORTS_FLOAT128) - -# Detect if sleef supported architectures are also supported by the compiler - -set (CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_SSE2}) -CHECK_C_SOURCE_COMPILES(" - #if defined(_MSC_VER) - #include - #else - #include - #endif - int main() { - __m128d r = _mm_mul_pd(_mm_set1_pd(1), _mm_set1_pd(2)); }" - COMPILER_SUPPORTS_SSE2) - -set (CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_SSE4}) -CHECK_C_SOURCE_COMPILES(" - #if defined(_MSC_VER) - #include - #else - #include - #endif - int main() { - __m128d r = _mm_floor_sd(_mm_set1_pd(1), _mm_set1_pd(2)); }" - COMPILER_SUPPORTS_SSE4) - -set (CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_AVX}) -CHECK_C_SOURCE_COMPILES(" - #if defined(_MSC_VER) - #include - #else - #include - #endif - int main() { - __m256d r = _mm256_add_pd(_mm256_set1_pd(1), _mm256_set1_pd(2)); - }" COMPILER_SUPPORTS_AVX) - -set (CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_AVX2}) -CHECK_C_SOURCE_COMPILES(" - #if defined(_MSC_VER) - #include - #else - #include - #endif - int main() { - __m256i r = _mm256_abs_epi32(_mm256_set1_epi32(1)); }" - COMPILER_SUPPORTS_AVX2) - -set (CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_SVE}) -CHECK_C_SOURCE_COMPILES(" - #include - int main() { - svint32_t r = svdup_n_s32(1); }" - COMPILER_SUPPORTS_SVE) - -# AVX2 implies AVX2128 -if(COMPILER_SUPPORTS_AVX2) - set(COMPILER_SUPPORTS_AVX2128 1) -endif() - -# Check if compilation with OpenMP really succeeds -# It does not succeed on Travis even though find_package(OpenMP) succeeds. -find_package(OpenMP) -if(OPENMP_FOUND) - set (CMAKE_REQUIRED_FLAGS "${OpenMP_C_FLAGS}") - CHECK_C_SOURCE_COMPILES(" - #include - int main() { - int i; - #pragma omp parallel for - for(i=0;i < 10;i++) { putchar(0); } - }" - COMPILER_SUPPORTS_OPENMP) -endif(OPENMP_FOUND) - -# Check weak aliases are supported. -CHECK_C_SOURCE_COMPILES(" -#if defined(__CYGWIN__) -#define EXPORT __stdcall __declspec(dllexport) -#else -#define EXPORT -#endif - EXPORT int f(int a) { - return a + 2; - } - EXPORT int g(int a) __attribute__((weak, alias(\"f\"))); - int main(void) { - return g(2); - }" - COMPILER_SUPPORTS_WEAK_ALIASES) -if (COMPILER_SUPPORTS_WEAK_ALIASES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm" AND NOT MINGW) - set(ENABLE_GNUABI ${COMPILER_SUPPORTS_WEAK_ALIASES}) -endif() - -CHECK_C_SOURCE_COMPILES(" - int main(void) { - double a = __builtin_sqrt (2); - float b = __builtin_sqrtf(2); - }" - COMPILER_SUPPORTS_BUILTIN_MATH) - -# Reset used flags -set(CMAKE_REQUIRED_FLAGS) - -# Save the default C flags -set(ORG_CMAKE_C_FLAGS CMAKE_C_FLAGS) - -# Check if sde64 command is available - -find_program(SDE_COMMAND sde64) -if (NOT SDE_COMMAND) - find_program(SDE_COMMAND sde) -endif() - -# Check if armie command is available - -find_program(ARMIE_COMMAND armie) -if (NOT SVE_VECTOR_BITS) - set(SVE_VECTOR_BITS 128) -endif() -## - -if(SLEEF_SHOW_ERROR_LOG) - if (EXISTS ${PROJECT_BINARY_DIR}/CMakeFiles/CMakeError.log) - file(READ ${PROJECT_BINARY_DIR}/CMakeFiles/CMakeError.log FILE_CONTENT) - message("${FILE_CONTENT}") - endif() -endif(SLEEF_SHOW_ERROR_LOG) - -# Detect if cmake is running on Travis -string(COMPARE NOTEQUAL "" "$ENV{TRAVIS}" RUNNING_ON_TRAVIS) - -if (${RUNNING_ON_TRAVIS} AND CMAKE_C_COMPILER_ID MATCHES "Clang") - message("Travix bug workaround turned on") - set(COMPILER_SUPPORTS_OPENMP FALSE) # Workaround for https://github.com/travis-ci/travis-ci/issues/8613 - set(COMPILER_SUPPORTS_FLOAT128 FALSE) # Compilation on unroll_0_vecextqp.c does not finish on Travis -endif() - -# Set common definitions - -if (NOT BUILD_SHARED_LIBS) - set(COMMON_TARGET_DEFINITIONS SLEEF_STATIC_LIBS=1) -endif() - -if (COMPILER_SUPPORTS_WEAK_ALIASES) - set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} ENABLE_ALIAS=1) -endif() diff --git a/aten/src/ATen/cpu/vec256/vec256_double.h b/aten/src/ATen/cpu/vec256/vec256_double.h index 5823ea89a13..7831f2f9f2d 100644 --- a/aten/src/ATen/cpu/vec256/vec256_double.h +++ b/aten/src/ATen/cpu/vec256/vec256_double.h @@ -2,7 +2,6 @@ #include "intrinsics.h" #include "vec256_base.h" -#include namespace at { namespace vec256 { @@ -55,36 +54,6 @@ public: auto mask = _mm256_set1_pd(-0.f); return _mm256_andnot_pd(mask, values); } - Vec256 acos() const { - return Vec256(Sleef_acosd4_u10(values)); - } - Vec256 asin() const { - return Vec256(Sleef_asind4_u10(values)); - } - Vec256 atan() const { - return Vec256(Sleef_atand4_u10(values)); - } - Vec256 erf() const { - return Vec256(Sleef_erfd4_u10(values)); - } - Vec256 exp() const { - return Vec256(Sleef_expd4_u10(values)); - } - Vec256 expm1() const { - return Vec256(Sleef_expm1d4_u10(values)); - } - Vec256 log() const { - return Vec256(Sleef_logd4_u10(values)); - } - Vec256 log2() const { - return Vec256(Sleef_log2d4_u10(values)); - } - Vec256 log10() const { - return Vec256(Sleef_log10d4_u10(values)); - } - Vec256 log1p() const { - return Vec256(Sleef_log1pd4_u10(values)); - } Vec256 sin() const { return map(std::sin); } diff --git a/aten/src/ATen/cpu/vec256/vec256_float.h b/aten/src/ATen/cpu/vec256/vec256_float.h index 3cef0d47973..4d75cec4a31 100644 --- a/aten/src/ATen/cpu/vec256/vec256_float.h +++ b/aten/src/ATen/cpu/vec256/vec256_float.h @@ -2,7 +2,6 @@ #include "intrinsics.h" #include "vec256_base.h" -#include #include namespace at { @@ -56,36 +55,6 @@ public: auto mask = _mm256_set1_ps(-0.f); return _mm256_andnot_ps(mask, values); } - Vec256 acos() const { - return Vec256(Sleef_acosf8_u10(values)); - } - Vec256 asin() const { - return Vec256(Sleef_asinf8_u10(values)); - } - Vec256 atan() const { - return Vec256(Sleef_atanf8_u10(values)); - } - Vec256 erf() const { - return Vec256(Sleef_erff8_u10(values)); - } - Vec256 exp() const { - return Vec256(Sleef_expf8_u10(values)); - } - Vec256 expm1() const { - return Vec256(Sleef_expm1f8_u10(values)); - } - Vec256 log() const { - return Vec256(Sleef_logf8_u10(values)); - } - Vec256 log2() const { - return Vec256(Sleef_log2f8_u10(values)); - } - Vec256 log10() const { - return Vec256(Sleef_log10f8_u10(values)); - } - Vec256 log1p() const { - return Vec256(Sleef_log1pf8_u10(values)); - } Vec256 sin() const { return map(std::sin); } diff --git a/aten/src/ATen/native/UnaryOps.cpp b/aten/src/ATen/native/UnaryOps.cpp index 83166e8adcc..4ba12b79634 100644 --- a/aten/src/ATen/native/UnaryOps.cpp +++ b/aten/src/ATen/native/UnaryOps.cpp @@ -6,7 +6,6 @@ #include "ATen/CPUApplyUtils.h" #include "ATen/Parallel.h" -#include "ATen/native/cpu/UnaryOpsKernel.h" #include #include @@ -66,36 +65,6 @@ Tensor& fill_(Tensor& self, const Tensor& value) { return result; \ } -#define IMPLEMENT_UNARY_OP_VEC(op, opfn) \ - Tensor& _##op##__cpu(Tensor& self_) { \ - if (self_.numel() > 0) { \ - Tensor self = sort_strides(self_); \ - if (self.is_contiguous()) { \ - op##Impl(self, self); \ - } else { \ - AT_DISPATCH_FLOATING_TYPES(self.type(), op, [&] { \ - CPU_tensor_parallel_apply1( \ - self, [](scalar_t& y) { y = opfn(y); }); \ - }); \ - } \ - } \ - return self_; \ - } \ - Tensor& _##op##_out_cpu(Tensor& result, const Tensor& self) { \ - result.resize_(self.sizes()); \ - if (result.numel() > 0) { \ - if (result.is_contiguous() && self.is_contiguous()) { \ - op##Impl(result, self); \ - } else { \ - AT_DISPATCH_FLOATING_TYPES(self.type(), op, [&] { \ - CPU_tensor_parallel_apply2( \ - result, self, [](scalar_t& y, scalar_t& x) { y = opfn(x); }); \ - }); \ - } \ - } \ - return result; \ - } - IMPLEMENT_UNARY_OP_PREQUEL(abs) IMPLEMENT_UNARY_OP_PREQUEL(acos) IMPLEMENT_UNARY_OP_PREQUEL(asin) @@ -130,28 +99,48 @@ Tensor& _tanh_out_cuda(Tensor& result, const Tensor& self) { return at::_th_tanh_out(result, self); } -IMPLEMENT_UNARY_OP_VEC(abs, std::abs) -IMPLEMENT_UNARY_OP_VEC(acos, std::acos) -IMPLEMENT_UNARY_OP_VEC(asin, std::asin) -IMPLEMENT_UNARY_OP_VEC(atan, std::atan) -IMPLEMENT_UNARY_OP_VEC(ceil, std::ceil) +Tensor& _abs__cpu(Tensor& self_) { + if (self_.numel() > 0) { + Tensor self = sort_strides(self_); + AT_DISPATCH_ALL_TYPES(self.type(), abs, [&] { + CPU_tensor_parallel_apply1( + self, [](scalar_t& y) { y = std::abs(y); }); + }); + } + return self_; +} +Tensor& _abs_out_cpu(Tensor& result, const Tensor& self) { + result.resize_(self.sizes()); + if (result.numel() > 0) { + AT_DISPATCH_ALL_TYPES(self.type(), abs, [&] { + CPU_tensor_parallel_apply2( + result, self, [](scalar_t& y, scalar_t& x) { y = std::abs(x); }); + }); + } + return result; +} + +IMPLEMENT_UNARY_OP_FLOAT_CMATH(acos, std::acos) +IMPLEMENT_UNARY_OP_FLOAT_CMATH(asin, std::asin) +IMPLEMENT_UNARY_OP_FLOAT_CMATH(atan, std::atan) +IMPLEMENT_UNARY_OP_FLOAT_CMATH(ceil, std::ceil) IMPLEMENT_UNARY_OP_FLOAT_CMATH(cos, std::cos) IMPLEMENT_UNARY_OP_FLOAT_CMATH(cosh, std::cosh) -IMPLEMENT_UNARY_OP_VEC(erf, std::erf) -IMPLEMENT_UNARY_OP_VEC(exp, std::exp) -IMPLEMENT_UNARY_OP_VEC(expm1, std::expm1) -IMPLEMENT_UNARY_OP_VEC(floor, std::floor) -IMPLEMENT_UNARY_OP_VEC(log, std::log) -IMPLEMENT_UNARY_OP_VEC(log10, std::log10) -IMPLEMENT_UNARY_OP_VEC(log1p, std::log1p) -IMPLEMENT_UNARY_OP_VEC(log2, std::log2) -IMPLEMENT_UNARY_OP_VEC(round, std::round) -IMPLEMENT_UNARY_OP_VEC(rsqrt, 1 / std::sqrt) +IMPLEMENT_UNARY_OP_FLOAT_CMATH(erf, std::erf) +IMPLEMENT_UNARY_OP_FLOAT_CMATH(exp, std::exp) +IMPLEMENT_UNARY_OP_FLOAT_CMATH(expm1, std::expm1) +IMPLEMENT_UNARY_OP_FLOAT_CMATH(floor, std::floor) +IMPLEMENT_UNARY_OP_FLOAT_CMATH(log, std::log) +IMPLEMENT_UNARY_OP_FLOAT_CMATH(log10, std::log10) +IMPLEMENT_UNARY_OP_FLOAT_CMATH(log1p, std::log1p) +IMPLEMENT_UNARY_OP_FLOAT_CMATH(log2, std::log2) +IMPLEMENT_UNARY_OP_FLOAT_CMATH(round, std::round) +IMPLEMENT_UNARY_OP_FLOAT_CMATH(rsqrt, 1 / std::sqrt) IMPLEMENT_UNARY_OP_FLOAT_CMATH(sin, std::sin) IMPLEMENT_UNARY_OP_FLOAT_CMATH(sinh, std::sinh) -IMPLEMENT_UNARY_OP_VEC(sqrt, std::sqrt) +IMPLEMENT_UNARY_OP_FLOAT_CMATH(sqrt, std::sqrt) IMPLEMENT_UNARY_OP_FLOAT_CMATH(tan, std::tan) IMPLEMENT_UNARY_OP_FLOAT_CMATH(tanh, std::tanh) -IMPLEMENT_UNARY_OP_VEC(trunc, std::trunc) +IMPLEMENT_UNARY_OP_FLOAT_CMATH(trunc, std::trunc) } } // namespace at diff --git a/aten/src/ATen/native/cpu/UnaryOpsKernel.cpp b/aten/src/ATen/native/cpu/UnaryOpsKernel.cpp deleted file mode 100644 index 0e617a96a01..00000000000 --- a/aten/src/ATen/native/cpu/UnaryOpsKernel.cpp +++ /dev/null @@ -1,102 +0,0 @@ -#include "ATen/native/cpu/UnaryOpsKernel.h" - -#include -#include -#include "ATen/Dispatch.h" -#include "ATen/Parallel.h" -#include "ATen/cpu/vec256/vec256.h" -#include "ATen/native/cpu/CapabilityDispatch.h" - -namespace at { namespace native { -namespace { - -using namespace vec256; - -template -static void -unary_kernel(scalar_t* arr_out, const scalar_t* arr_in, int64_t size, F func) { - using Vec = Vec256; - int64_t size_rounded = size - (size % Vec::size); - int64_t k = 0; - for (; k != size_rounded; k += Vec::size) { - auto value = func(Vec::s_load(arr_in + k)); - value.store(arr_out + k); - } - auto leftover = size - k; - if (leftover > 0) { - Vec a; - a.load_partial(arr_in + k, leftover); - func(a).store_partial(arr_out + k, leftover); - } -} - -template -static void parallel_apply(Tensor& result, const Tensor& self, F f) { - internal::init_tbb_num_threads(); - - static tbb::affinity_partitioner ap; - - auto arr_out = result.data(); - auto arr_in = self.data(); - int64_t size = self.numel(); - if (size < internal::TBB_GRAIN_SIZE) { - unary_kernel(arr_out, arr_in, size, f); - } else { - tbb::parallel_for( - tbb::blocked_range(0, size, internal::TBB_GRAIN_SIZE), - [&](const tbb::blocked_range& r) { - auto size = r.end() - r.begin(); - unary_kernel(arr_out + r.begin(), arr_in + r.begin(), size, f); - }, - ap); - } -} - -static void abs_kernel(Tensor& result, const Tensor& self) { - AT_DISPATCH_ALL_TYPES(self.type(), "abs", [&] { - parallel_apply( - result, - self, - [](const Vec256& x) { return x.abs(); }); }); -} - -static void rsqrt_kernel(Tensor& result, const Tensor& self) { - AT_DISPATCH_FLOATING_TYPES(self.type(), "rsqrt", [&] { - parallel_apply( - result, - self, - [](const Vec256& x) { return Vec256((scalar_t)(1)) / x.sqrt(); }); }); -} - -#define IMPLEMENT_FLOAT_KERNEL(op) \ - static void op##_kernel(Tensor& result, const Tensor& self) { \ - AT_DISPATCH_FLOATING_TYPES(self.type(), #op, [&] { \ - parallel_apply( \ - result, self, [](const Vec256& x) { return x.op(); }); \ - }); \ - } \ - REGISTER_DISPATCH(op##Impl, &op##_kernel) - -} // anonymous namespace - - -REGISTER_DISPATCH(absImpl, &abs_kernel); -REGISTER_DISPATCH(rsqrtImpl, &rsqrt_kernel); - -IMPLEMENT_FLOAT_KERNEL(acos) -IMPLEMENT_FLOAT_KERNEL(asin) -IMPLEMENT_FLOAT_KERNEL(atan) -IMPLEMENT_FLOAT_KERNEL(erf) -IMPLEMENT_FLOAT_KERNEL(exp) -IMPLEMENT_FLOAT_KERNEL(expm1) -IMPLEMENT_FLOAT_KERNEL(log) -IMPLEMENT_FLOAT_KERNEL(log10) -IMPLEMENT_FLOAT_KERNEL(log1p) -IMPLEMENT_FLOAT_KERNEL(log2) -IMPLEMENT_FLOAT_KERNEL(ceil) -IMPLEMENT_FLOAT_KERNEL(floor) -IMPLEMENT_FLOAT_KERNEL(round) -IMPLEMENT_FLOAT_KERNEL(sqrt) -IMPLEMENT_FLOAT_KERNEL(trunc) - -}} // namespace at::native diff --git a/aten/src/ATen/native/cpu/UnaryOpsKernel.h b/aten/src/ATen/native/cpu/UnaryOpsKernel.h deleted file mode 100644 index f5998881889..00000000000 --- a/aten/src/ATen/native/cpu/UnaryOpsKernel.h +++ /dev/null @@ -1,49 +0,0 @@ -#pragma once - -#include -#include -#include "CapabilityDispatch.h" - -namespace at { namespace native { - -using unary_fn = void(*)(Tensor&, const Tensor&); - -extern DispatchStub absImpl; -extern DispatchStub acosImpl; -extern DispatchStub asinImpl; -extern DispatchStub atanImpl; -extern DispatchStub ceilImpl; -extern DispatchStub erfImpl; -extern DispatchStub expImpl; -extern DispatchStub expm1Impl; -extern DispatchStub fracImpl; -extern DispatchStub floorImpl; -extern DispatchStub logImpl; -extern DispatchStub log10Impl; -extern DispatchStub log1pImpl; -extern DispatchStub log2Impl; -extern DispatchStub roundImpl; -extern DispatchStub rsqrtImpl; -extern DispatchStub sqrtImpl; -extern DispatchStub truncImpl; - - -// Missing unary functions -// digamma -// lgamma - -// TODO: See below -// erfinv -// fill -// frac -// clone -// contiguous -// clamp/_min/_max -// neg -// reciprocal -// sigmoid -// sign -// zero - - -}} // namespace at::native diff --git a/third_party/sleef b/third_party/sleef deleted file mode 160000 index e4217b4fdcf..00000000000 --- a/third_party/sleef +++ /dev/null @@ -1 +0,0 @@ -Subproject commit e4217b4fdcfc47b0b073d490c0ddeef5f0eb5fc9