From e0f1fe102ac85bcf3b7b01e50fa56df1bcbbfef1 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Mon, 21 Aug 2023 16:51:50 +0000 Subject: [PATCH] Revert "Add scalar conversion using avx instructions for half (#102140)" This reverts commit 1d6a44656755c89f4f9a878865dcb0ac39af9a74. Reverted https://github.com/pytorch/pytorch/pull/102140 on behalf of https://github.com/ZainRizvi due to Sorry, this is still breaking internal builds. Specifically, the dynamo test test_repros.py::DynamicShapesReproTests::test_odict_get_item_index_name ([comment](https://github.com/pytorch/pytorch/pull/102140#issuecomment-1686684117)) --- aten/src/ATen/cpu/vec/vec_half.h | 44 ----------------------- aten/src/ATen/test/vec_test_all_types.cpp | 23 ------------ c10/util/Half-inl.h | 15 -------- 3 files changed, 82 deletions(-) delete mode 100644 aten/src/ATen/cpu/vec/vec_half.h diff --git a/aten/src/ATen/cpu/vec/vec_half.h b/aten/src/ATen/cpu/vec/vec_half.h deleted file mode 100644 index ca6afdd2ca8..00000000000 --- a/aten/src/ATen/cpu/vec/vec_half.h +++ /dev/null @@ -1,44 +0,0 @@ -#pragma once - -#include - -namespace at { -namespace vec { -// See Note [CPU_CAPABILITY namespace] -inline namespace CPU_CAPABILITY { - -#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \ - !defined(__APPLE__) -inline uint16_t float2half_scalar(float val) { -#if defined(CPU_CAPABILITY_AVX2) - __m256 v = _mm256_set1_ps(val); - __m128i o = - _mm256_cvtps_ph(v, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); - return static_cast(_mm_cvtsi128_si32(o)); -#elif defined(CPU_CAPABILITY_AVX512) - __m512 v = _mm512_set1_ps(val); - __m256i o = - _mm512_cvtps_ph(v, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); - return static_cast( - _mm_cvtsi128_si32(_mm256_castsi256_si128(o))); -#endif -} - -inline float half2float_scalar(uint16_t val) { -#if defined(CPU_CAPABILITY_AVX2) - __m128i v = _mm_cvtsi32_si128(val); - __m256 o = _mm256_cvtph_ps(v); - return _mm256_cvtss_f32(o); -#elif defined(CPU_CAPABILITY_AVX512) - __m256i v = - _mm256_setr_epi16(val, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); - __m512 o = _mm512_cvtph_ps(v); - return _mm512_cvtss_f32(o); -#endif -} - -#endif - -} // namespace CPU_CAPABILITY -} // namespace vec -} // namespace at diff --git a/aten/src/ATen/test/vec_test_all_types.cpp b/aten/src/ATen/test/vec_test_all_types.cpp index b990a9bd171..b9a754cde35 100644 --- a/aten/src/ATen/test/vec_test_all_types.cpp +++ b/aten/src/ATen/test/vec_test_all_types.cpp @@ -1569,29 +1569,6 @@ namespace { } } } - TEST(HalfConversionTest, HalfFloat) { - float f32s[100]; - for (const auto i : c10::irange(100)) { - f32s[i] = i + 0.3; - } - uint16_t u16; - float x; - for (const auto i : c10::irange(100)) { - #if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \ - !defined(__APPLE__) - u16 = at::vec::float2half_scalar(f32s[i]); - x = at::vec::half2float_scalar(u16); - #else - u16 = c10::detail::fp16_ieee_from_fp32_value(f32s[i]); - x = c10::detail::fp16_ieee_to_fp32_value(u16); - #endif - - EXPECT_EQ(u16, c10::detail::fp16_ieee_from_fp32_value(f32s[i])) - << "Test failed for float to uint16 " << f32s[i] << "\n"; - EXPECT_EQ(x, c10::detail::fp16_ieee_to_fp32_value(u16)) - << "Test failed for uint16 to float " << u16 << "\n"; - } - } #else #error GTEST does not have TYPED_TEST diff --git a/c10/util/Half-inl.h b/c10/util/Half-inl.h index d2c836eecf7..daac07c6559 100644 --- a/c10/util/Half-inl.h +++ b/c10/util/Half-inl.h @@ -20,11 +20,6 @@ #include // for SYCL 2020 #endif -#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \ - !defined(__APPLE__) -#include -#endif - C10_CLANG_DIAGNOSTIC_PUSH() #if C10_CLANG_HAS_WARNING("-Wimplicit-int-float-conversion") C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion") @@ -40,14 +35,9 @@ inline C10_HOST_DEVICE Half::Half(float value) x(__half_as_short(__float2half(value))) #elif defined(__SYCL_DEVICE_ONLY__) x(c10::bit_cast(sycl::half(value))) -#else -#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \ - !defined(__APPLE__) - x(at::vec::float2half_scalar(value)) #else x(detail::fp16_ieee_from_fp32_value(value)) #endif -#endif { } @@ -58,14 +48,9 @@ inline C10_HOST_DEVICE Half::operator float() const { return __half2float(*reinterpret_cast(&x)); #elif defined(__SYCL_DEVICE_ONLY__) return float(c10::bit_cast(x)); -#else -#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \ - !defined(__APPLE__) - return at::vec::half2float_scalar(x); #else return detail::fp16_ieee_to_fp32_value(x); #endif -#endif } #if defined(__CUDACC__) || defined(__HIPCC__)