Revert "Add scalar conversion using avx instructions for half (#102140)"

This reverts commit 1d6a446567.

Reverted https://github.com/pytorch/pytorch/pull/102140 on behalf of https://github.com/ZainRizvi due to Sorry, this is still breaking internal builds. Specifically, the dynamo test test_repros.py::DynamicShapesReproTests::test_odict_get_item_index_name ([comment](https://github.com/pytorch/pytorch/pull/102140#issuecomment-1686684117))
This commit is contained in:
PyTorch MergeBot 2023-08-21 16:51:50 +00:00
parent df16b1ed53
commit e0f1fe102a
3 changed files with 0 additions and 82 deletions

View file

@ -1,44 +0,0 @@
#pragma once
#include <ATen/cpu/vec/intrinsics.h>
namespace at {
namespace vec {
// See Note [CPU_CAPABILITY namespace]
inline namespace CPU_CAPABILITY {
#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
!defined(__APPLE__)
inline uint16_t float2half_scalar(float val) {
#if defined(CPU_CAPABILITY_AVX2)
__m256 v = _mm256_set1_ps(val);
__m128i o =
_mm256_cvtps_ph(v, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
return static_cast<std::uint16_t>(_mm_cvtsi128_si32(o));
#elif defined(CPU_CAPABILITY_AVX512)
__m512 v = _mm512_set1_ps(val);
__m256i o =
_mm512_cvtps_ph(v, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
return static_cast<std::uint16_t>(
_mm_cvtsi128_si32(_mm256_castsi256_si128(o)));
#endif
}
inline float half2float_scalar(uint16_t val) {
#if defined(CPU_CAPABILITY_AVX2)
__m128i v = _mm_cvtsi32_si128(val);
__m256 o = _mm256_cvtph_ps(v);
return _mm256_cvtss_f32(o);
#elif defined(CPU_CAPABILITY_AVX512)
__m256i v =
_mm256_setr_epi16(val, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
__m512 o = _mm512_cvtph_ps(v);
return _mm512_cvtss_f32(o);
#endif
}
#endif
} // namespace CPU_CAPABILITY
} // namespace vec
} // namespace at

View file

@ -1569,29 +1569,6 @@ namespace {
}
}
}
TEST(HalfConversionTest, HalfFloat) {
float f32s[100];
for (const auto i : c10::irange(100)) {
f32s[i] = i + 0.3;
}
uint16_t u16;
float x;
for (const auto i : c10::irange(100)) {
#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
!defined(__APPLE__)
u16 = at::vec::float2half_scalar(f32s[i]);
x = at::vec::half2float_scalar(u16);
#else
u16 = c10::detail::fp16_ieee_from_fp32_value(f32s[i]);
x = c10::detail::fp16_ieee_to_fp32_value(u16);
#endif
EXPECT_EQ(u16, c10::detail::fp16_ieee_from_fp32_value(f32s[i]))
<< "Test failed for float to uint16 " << f32s[i] << "\n";
EXPECT_EQ(x, c10::detail::fp16_ieee_to_fp32_value(u16))
<< "Test failed for uint16 to float " << u16 << "\n";
}
}
#else
#error GTEST does not have TYPED_TEST

View file

@ -20,11 +20,6 @@
#include <sycl/sycl.hpp> // for SYCL 2020
#endif
#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
!defined(__APPLE__)
#include <ATen/cpu/vec/vec_half.h>
#endif
C10_CLANG_DIAGNOSTIC_PUSH()
#if C10_CLANG_HAS_WARNING("-Wimplicit-int-float-conversion")
C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion")
@ -40,14 +35,9 @@ inline C10_HOST_DEVICE Half::Half(float value)
x(__half_as_short(__float2half(value)))
#elif defined(__SYCL_DEVICE_ONLY__)
x(c10::bit_cast<uint16_t>(sycl::half(value)))
#else
#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
!defined(__APPLE__)
x(at::vec::float2half_scalar(value))
#else
x(detail::fp16_ieee_from_fp32_value(value))
#endif
#endif
{
}
@ -58,14 +48,9 @@ inline C10_HOST_DEVICE Half::operator float() const {
return __half2float(*reinterpret_cast<const __half*>(&x));
#elif defined(__SYCL_DEVICE_ONLY__)
return float(c10::bit_cast<sycl::half>(x));
#else
#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
!defined(__APPLE__)
return at::vec::half2float_scalar(x);
#else
return detail::fp16_ieee_to_fp32_value(x);
#endif
#endif
}
#if defined(__CUDACC__) || defined(__HIPCC__)