mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Revert "Add scalar conversion using avx instructions for half (#102140)"
This reverts commit 1d6a446567.
Reverted https://github.com/pytorch/pytorch/pull/102140 on behalf of https://github.com/ZainRizvi due to Sorry, this is still breaking internal builds. Specifically, the dynamo test test_repros.py::DynamicShapesReproTests::test_odict_get_item_index_name ([comment](https://github.com/pytorch/pytorch/pull/102140#issuecomment-1686684117))
This commit is contained in:
parent
df16b1ed53
commit
e0f1fe102a
3 changed files with 0 additions and 82 deletions
|
|
@ -1,44 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <ATen/cpu/vec/intrinsics.h>
|
||||
|
||||
namespace at {
|
||||
namespace vec {
|
||||
// See Note [CPU_CAPABILITY namespace]
|
||||
inline namespace CPU_CAPABILITY {
|
||||
|
||||
#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
|
||||
!defined(__APPLE__)
|
||||
inline uint16_t float2half_scalar(float val) {
|
||||
#if defined(CPU_CAPABILITY_AVX2)
|
||||
__m256 v = _mm256_set1_ps(val);
|
||||
__m128i o =
|
||||
_mm256_cvtps_ph(v, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
|
||||
return static_cast<std::uint16_t>(_mm_cvtsi128_si32(o));
|
||||
#elif defined(CPU_CAPABILITY_AVX512)
|
||||
__m512 v = _mm512_set1_ps(val);
|
||||
__m256i o =
|
||||
_mm512_cvtps_ph(v, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
|
||||
return static_cast<std::uint16_t>(
|
||||
_mm_cvtsi128_si32(_mm256_castsi256_si128(o)));
|
||||
#endif
|
||||
}
|
||||
|
||||
inline float half2float_scalar(uint16_t val) {
|
||||
#if defined(CPU_CAPABILITY_AVX2)
|
||||
__m128i v = _mm_cvtsi32_si128(val);
|
||||
__m256 o = _mm256_cvtph_ps(v);
|
||||
return _mm256_cvtss_f32(o);
|
||||
#elif defined(CPU_CAPABILITY_AVX512)
|
||||
__m256i v =
|
||||
_mm256_setr_epi16(val, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
__m512 o = _mm512_cvtph_ps(v);
|
||||
return _mm512_cvtss_f32(o);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace CPU_CAPABILITY
|
||||
} // namespace vec
|
||||
} // namespace at
|
||||
|
|
@ -1569,29 +1569,6 @@ namespace {
|
|||
}
|
||||
}
|
||||
}
|
||||
TEST(HalfConversionTest, HalfFloat) {
|
||||
float f32s[100];
|
||||
for (const auto i : c10::irange(100)) {
|
||||
f32s[i] = i + 0.3;
|
||||
}
|
||||
uint16_t u16;
|
||||
float x;
|
||||
for (const auto i : c10::irange(100)) {
|
||||
#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
|
||||
!defined(__APPLE__)
|
||||
u16 = at::vec::float2half_scalar(f32s[i]);
|
||||
x = at::vec::half2float_scalar(u16);
|
||||
#else
|
||||
u16 = c10::detail::fp16_ieee_from_fp32_value(f32s[i]);
|
||||
x = c10::detail::fp16_ieee_to_fp32_value(u16);
|
||||
#endif
|
||||
|
||||
EXPECT_EQ(u16, c10::detail::fp16_ieee_from_fp32_value(f32s[i]))
|
||||
<< "Test failed for float to uint16 " << f32s[i] << "\n";
|
||||
EXPECT_EQ(x, c10::detail::fp16_ieee_to_fp32_value(u16))
|
||||
<< "Test failed for uint16 to float " << u16 << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
#error GTEST does not have TYPED_TEST
|
||||
|
|
|
|||
|
|
@ -20,11 +20,6 @@
|
|||
#include <sycl/sycl.hpp> // for SYCL 2020
|
||||
#endif
|
||||
|
||||
#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
|
||||
!defined(__APPLE__)
|
||||
#include <ATen/cpu/vec/vec_half.h>
|
||||
#endif
|
||||
|
||||
C10_CLANG_DIAGNOSTIC_PUSH()
|
||||
#if C10_CLANG_HAS_WARNING("-Wimplicit-int-float-conversion")
|
||||
C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion")
|
||||
|
|
@ -40,14 +35,9 @@ inline C10_HOST_DEVICE Half::Half(float value)
|
|||
x(__half_as_short(__float2half(value)))
|
||||
#elif defined(__SYCL_DEVICE_ONLY__)
|
||||
x(c10::bit_cast<uint16_t>(sycl::half(value)))
|
||||
#else
|
||||
#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
|
||||
!defined(__APPLE__)
|
||||
x(at::vec::float2half_scalar(value))
|
||||
#else
|
||||
x(detail::fp16_ieee_from_fp32_value(value))
|
||||
#endif
|
||||
#endif
|
||||
{
|
||||
}
|
||||
|
||||
|
|
@ -58,14 +48,9 @@ inline C10_HOST_DEVICE Half::operator float() const {
|
|||
return __half2float(*reinterpret_cast<const __half*>(&x));
|
||||
#elif defined(__SYCL_DEVICE_ONLY__)
|
||||
return float(c10::bit_cast<sycl::half>(x));
|
||||
#else
|
||||
#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
|
||||
!defined(__APPLE__)
|
||||
return at::vec::half2float_scalar(x);
|
||||
#else
|
||||
return detail::fp16_ieee_to_fp32_value(x);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||
|
|
|
|||
Loading…
Reference in a new issue