From e0f1fe102ac85bcf3b7b01e50fa56df1bcbbfef1 Mon Sep 17 00:00:00 2001
From: PyTorch MergeBot <pytorchmergebot@users.noreply.github.com>
Date: Mon, 21 Aug 2023 16:51:50 +0000
Subject: [PATCH] Revert "Add scalar conversion using avx instructions for half
 (#102140)"

This reverts commit 1d6a44656755c89f4f9a878865dcb0ac39af9a74.

Reverted https://github.com/pytorch/pytorch/pull/102140 on behalf of https://github.com/ZainRizvi due to Sorry, this is still breaking internal builds. Specifically, the dynamo test test_repros.py::DynamicShapesReproTests::test_odict_get_item_index_name ([comment](https://github.com/pytorch/pytorch/pull/102140#issuecomment-1686684117))
---
 aten/src/ATen/cpu/vec/vec_half.h          | 44 -----------------------
 aten/src/ATen/test/vec_test_all_types.cpp | 23 ------------
 c10/util/Half-inl.h                       | 15 --------
 3 files changed, 82 deletions(-)
 delete mode 100644 aten/src/ATen/cpu/vec/vec_half.h

diff --git a/aten/src/ATen/cpu/vec/vec_half.h b/aten/src/ATen/cpu/vec/vec_half.h
deleted file mode 100644
index ca6afdd2ca8..00000000000
--- a/aten/src/ATen/cpu/vec/vec_half.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#pragma once
-
-#include <ATen/cpu/vec/intrinsics.h>
-
-namespace at {
-namespace vec {
-// See Note [CPU_CAPABILITY namespace]
-inline namespace CPU_CAPABILITY {
-
-#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
-    !defined(__APPLE__)
-inline uint16_t float2half_scalar(float val) {
-#if defined(CPU_CAPABILITY_AVX2)
-  __m256 v = _mm256_set1_ps(val);
-  __m128i o =
-      _mm256_cvtps_ph(v, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
-  return static_cast<std::uint16_t>(_mm_cvtsi128_si32(o));
-#elif defined(CPU_CAPABILITY_AVX512)
-  __m512 v = _mm512_set1_ps(val);
-  __m256i o =
-      _mm512_cvtps_ph(v, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
-  return static_cast<std::uint16_t>(
-      _mm_cvtsi128_si32(_mm256_castsi256_si128(o)));
-#endif
-}
-
-inline float half2float_scalar(uint16_t val) {
-#if defined(CPU_CAPABILITY_AVX2)
-  __m128i v = _mm_cvtsi32_si128(val);
-  __m256 o = _mm256_cvtph_ps(v);
-  return _mm256_cvtss_f32(o);
-#elif defined(CPU_CAPABILITY_AVX512)
-  __m256i v =
-      _mm256_setr_epi16(val, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-  __m512 o = _mm512_cvtph_ps(v);
-  return _mm512_cvtss_f32(o);
-#endif
-}
-
-#endif
-
-} // namespace CPU_CAPABILITY
-} // namespace vec
-} // namespace at
diff --git a/aten/src/ATen/test/vec_test_all_types.cpp b/aten/src/ATen/test/vec_test_all_types.cpp
index b990a9bd171..b9a754cde35 100644
--- a/aten/src/ATen/test/vec_test_all_types.cpp
+++ b/aten/src/ATen/test/vec_test_all_types.cpp
@@ -1569,29 +1569,6 @@ namespace {
          }
       }
     }
-    TEST(HalfConversionTest, HalfFloat) {
-      float f32s[100];
-      for (const auto i : c10::irange(100)) {
-        f32s[i] = i + 0.3;
-      }
-      uint16_t u16;
-      float x;
-      for (const auto i : c10::irange(100)) {
-      #if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
-          !defined(__APPLE__)
-        u16 = at::vec::float2half_scalar(f32s[i]);
-        x = at::vec::half2float_scalar(u16);
-      #else
-        u16 = c10::detail::fp16_ieee_from_fp32_value(f32s[i]);
-        x = c10::detail::fp16_ieee_to_fp32_value(u16);
-      #endif
-
-        EXPECT_EQ(u16, c10::detail::fp16_ieee_from_fp32_value(f32s[i]))
-            << "Test failed for float to uint16 " << f32s[i] << "\n";
-        EXPECT_EQ(x, c10::detail::fp16_ieee_to_fp32_value(u16))
-            << "Test failed for uint16 to float " << u16 << "\n";
-      }
-    }
 
 #else
 #error GTEST does not have TYPED_TEST
diff --git a/c10/util/Half-inl.h b/c10/util/Half-inl.h
index d2c836eecf7..daac07c6559 100644
--- a/c10/util/Half-inl.h
+++ b/c10/util/Half-inl.h
@@ -20,11 +20,6 @@
 #include <sycl/sycl.hpp> // for SYCL 2020
 #endif
 
-#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
-    !defined(__APPLE__)
-#include <ATen/cpu/vec/vec_half.h>
-#endif
-
 C10_CLANG_DIAGNOSTIC_PUSH()
 #if C10_CLANG_HAS_WARNING("-Wimplicit-int-float-conversion")
 C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion")
@@ -40,14 +35,9 @@ inline C10_HOST_DEVICE Half::Half(float value)
       x(__half_as_short(__float2half(value)))
 #elif defined(__SYCL_DEVICE_ONLY__)
       x(c10::bit_cast<uint16_t>(sycl::half(value)))
-#else
-#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
-    !defined(__APPLE__)
-      x(at::vec::float2half_scalar(value))
 #else
       x(detail::fp16_ieee_from_fp32_value(value))
 #endif
-#endif
 {
 }
 
@@ -58,14 +48,9 @@ inline C10_HOST_DEVICE Half::operator float() const {
   return __half2float(*reinterpret_cast<const __half*>(&x));
 #elif defined(__SYCL_DEVICE_ONLY__)
   return float(c10::bit_cast<sycl::half>(x));
-#else
-#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
-    !defined(__APPLE__)
-  return at::vec::half2float_scalar(x);
 #else
   return detail::fp16_ieee_to_fp32_value(x);
 #endif
-#endif
 }
 
 #if defined(__CUDACC__) || defined(__HIPCC__)