Revert "Add scalar conversion using avx instructions for half (#102140)"

This reverts commit 1d6a446567. Reverted https://github.com/pytorch/pytorch/pull/102140 on behalf of https://github.com/ZainRizvi due to Sorry, this is still breaking internal builds. Specifically, the dynamo test test_repros.py::DynamicShapesReproTests::test_odict_get_item_index_name ([comment](https://github.com/pytorch/pytorch/pull/102140#issuecomment-1686684117))
2026-05-14 20:57:59 +00:00 · 2023-08-21 16:51:50 +00:00 · 2023-08-21 16:51:50 +00:00 · e0f1fe102a
commit e0f1fe102a
parent df16b1ed53
3 changed files with 0 additions and 82 deletions
--- a/aten/src/ATen/cpu/vec/vec_half.h
+++ b/aten/src/ATen/cpu/vec/vec_half.h
@ -1,44 +0,0 @@
-#pragma once
-
-#include <ATen/cpu/vec/intrinsics.h>
-
-namespace at {
-namespace vec {
-// See Note [CPU_CAPABILITY namespace]
-inline namespace CPU_CAPABILITY {
-
-#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
-    !defined(__APPLE__)
-inline uint16_t float2half_scalar(float val) {
-#if defined(CPU_CAPABILITY_AVX2)
-  __m256 v = _mm256_set1_ps(val);
-  __m128i o =
-      _mm256_cvtps_ph(v, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
-  return static_cast<std::uint16_t>(_mm_cvtsi128_si32(o));
-#elif defined(CPU_CAPABILITY_AVX512)
-  __m512 v = _mm512_set1_ps(val);
-  __m256i o =
-      _mm512_cvtps_ph(v, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
-  return static_cast<std::uint16_t>(
-      _mm_cvtsi128_si32(_mm256_castsi256_si128(o)));
-#endif
-}
-
-inline float half2float_scalar(uint16_t val) {
-#if defined(CPU_CAPABILITY_AVX2)
-  __m128i v = _mm_cvtsi32_si128(val);
-  __m256 o = _mm256_cvtph_ps(v);
-  return _mm256_cvtss_f32(o);
-#elif defined(CPU_CAPABILITY_AVX512)
-  __m256i v =
-      _mm256_setr_epi16(val, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-  __m512 o = _mm512_cvtph_ps(v);
-  return _mm512_cvtss_f32(o);
-#endif
-}
-
-#endif
-
-} // namespace CPU_CAPABILITY
-} // namespace vec
-} // namespace at
--- a/aten/src/ATen/test/vec_test_all_types.cpp
+++ b/aten/src/ATen/test/vec_test_all_types.cpp
@ -1569,29 +1569,6 @@ namespace {
         }
      }
    }
-    TEST(HalfConversionTest, HalfFloat) {
-      float f32s[100];
-      for (const auto i : c10::irange(100)) {
-        f32s[i] = i + 0.3;
-      }
-      uint16_t u16;
-      float x;
-      for (const auto i : c10::irange(100)) {
-      #if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
-          !defined(__APPLE__)
-        u16 = at::vec::float2half_scalar(f32s[i]);
-        x = at::vec::half2float_scalar(u16);
-      #else
-        u16 = c10::detail::fp16_ieee_from_fp32_value(f32s[i]);
-        x = c10::detail::fp16_ieee_to_fp32_value(u16);
-      #endif
-
-        EXPECT_EQ(u16, c10::detail::fp16_ieee_from_fp32_value(f32s[i]))
-            << "Test failed for float to uint16 " << f32s[i] << "\n";
-        EXPECT_EQ(x, c10::detail::fp16_ieee_to_fp32_value(u16))
-            << "Test failed for uint16 to float " << u16 << "\n";
-      }
-    }

 #else
 #error GTEST does not have TYPED_TEST
--- a/c10/util/Half-inl.h
+++ b/c10/util/Half-inl.h
@ -20,11 +20,6 @@
 #include <sycl/sycl.hpp> // for SYCL 2020
 #endif

-#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
-    !defined(__APPLE__)
-#include <ATen/cpu/vec/vec_half.h>
-#endif
-
 C10_CLANG_DIAGNOSTIC_PUSH()
 #if C10_CLANG_HAS_WARNING("-Wimplicit-int-float-conversion")
 C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion")
@ -40,14 +35,9 @@ inline C10_HOST_DEVICE Half::Half(float value)
      x(__half_as_short(__float2half(value)))
 #elif defined(__SYCL_DEVICE_ONLY__)
      x(c10::bit_cast<uint16_t>(sycl::half(value)))
-#else
-#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
-    !defined(__APPLE__)
-      x(at::vec::float2half_scalar(value))
 #else
      x(detail::fp16_ieee_from_fp32_value(value))
 #endif
-#endif
 {
 }

@ -58,14 +48,9 @@ inline C10_HOST_DEVICE Half::operator float() const {
  return __half2float(*reinterpret_cast<const __half*>(&x));
 #elif defined(__SYCL_DEVICE_ONLY__)
  return float(c10::bit_cast<sycl::half>(x));
-#else
-#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
-    !defined(__APPLE__)
-  return at::vec::half2float_scalar(x);
 #else
  return detail::fp16_ieee_to_fp32_value(x);
 #endif
-#endif
 }

 #if defined(__CUDACC__) || defined(__HIPCC__)