From de8cd6b20118f5c7d060c41e87bf6b90f7e20eed Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Fri, 22 Jan 2021 15:12:51 -0800 Subject: [PATCH] [BE] Replace M_PI with c10::pi constexpr variable (#50819) Summary: Also, get rid of MSVC specific `_USE_MATH_DEFINES` Test at compile time that c10::pi == M_PI Pull Request resolved: https://github.com/pytorch/pytorch/pull/50819 Reviewed By: albanD Differential Revision: D25976330 Pulled By: malfet fbshipit-source-id: 8f3ddfd58a5aa4bd382da64ad6ecc679706d1284 --- aten/src/ATen/CPUGeneratorImpl.cpp | 3 +- aten/src/ATen/core/DistributionsHelper.h | 11 ++----- aten/src/ATen/core/TransformationHelper.h | 3 +- aten/src/ATen/cpu/vec256/vec256_bfloat16.h | 2 +- .../ATen/cpu/vec256/vec256_complex_double.h | 3 +- .../ATen/cpu/vec256/vec256_complex_float.h | 3 +- aten/src/ATen/cpu/vec256/vec256_double.h | 2 +- aten/src/ATen/cpu/vec256/vec256_float.h | 2 +- aten/src/ATen/native/Distributions.h | 5 ++-- aten/src/ATen/native/Loss.cpp | 9 +----- aten/src/ATen/native/Math.h | 22 +++++++------- aten/src/ATen/native/SpectralOps.cpp | 8 ----- aten/src/ATen/native/TensorFactories.cpp | 13 ++------ aten/src/ATen/native/UnaryOps.cpp | 13 ++------ .../ATen/native/cpu/DistributionTemplates.h | 4 +-- aten/src/ATen/native/cpu/UnaryOpsKernel.cpp | 4 +-- aten/src/ATen/native/cpu/zmath.h | 3 +- aten/src/ATen/native/cuda/UnaryOpsKernel.cu | 4 ++- aten/src/TH/THGeneral.h.in | 4 --- c10/util/MathConstants.cpp | 12 ++++++++ c10/util/MathConstants.h | 30 +++++++++++++++++++ 21 files changed, 84 insertions(+), 76 deletions(-) create mode 100644 c10/util/MathConstants.cpp create mode 100644 c10/util/MathConstants.h diff --git a/aten/src/ATen/CPUGeneratorImpl.cpp b/aten/src/ATen/CPUGeneratorImpl.cpp index ff4a2f1c61e..aab4b4d702c 100644 --- a/aten/src/ATen/CPUGeneratorImpl.cpp +++ b/aten/src/ATen/CPUGeneratorImpl.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include namespace at { @@ -153,7 +154,7 @@ void CPUGeneratorImpl::set_state(const c10::TensorImpl& new_state) { // intermediate values. if (legacy_pod->normal_is_valid) { auto r = legacy_pod->normal_rho; - auto theta = 2.0 * M_PI * legacy_pod->normal_x; + auto theta = 2.0 * c10::pi * legacy_pod->normal_x; // we return the sin version of the normal sample when in caching mode double_normal_sample = c10::optional(r * ::sin(theta)); } diff --git a/aten/src/ATen/core/DistributionsHelper.h b/aten/src/ATen/core/DistributionsHelper.h index d767a1ca523..6205fc4210f 100644 --- a/aten/src/ATen/core/DistributionsHelper.h +++ b/aten/src/ATen/core/DistributionsHelper.h @@ -1,17 +1,10 @@ #pragma once -// define constants like M_PI and C keywords for MSVC -#ifdef _MSC_VER -#ifndef _USE_MATH_DEFINES -#define _USE_MATH_DEFINES -#endif -#include -#endif - #include #include #include #include +#include #include #include @@ -220,7 +213,7 @@ struct normal_distribution { const dist_acctype u1 = uniform(generator); const dist_acctype u2 = uniform(generator); const dist_acctype r = ::sqrt(static_cast(-2.0) * ::log(static_cast(1.0)-u2)); - const dist_acctype theta = static_cast(2.0) * static_cast(M_PI) * u1; + const dist_acctype theta = static_cast(2.0) * c10::pi * u1; if (std::is_same::value) { maybe_set_next_double_normal_sample(generator, r * ::sin(theta)); } else { diff --git a/aten/src/ATen/core/TransformationHelper.h b/aten/src/ATen/core/TransformationHelper.h index e8bafe3bcba..e367fd7daca 100644 --- a/aten/src/ATen/core/TransformationHelper.h +++ b/aten/src/ATen/core/TransformationHelper.h @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -101,7 +102,7 @@ C10_HOST_DEVICE inline T normal(T val, T mean, T std) { template C10_HOST_DEVICE inline T cauchy(T val, T median, T sigma) { // https://en.wikipedia.org/wiki/Cauchy_distribution#Cumulative_distribution_function - return median + sigma * at::tan(static_cast(M_PI) * (val - static_cast(0.5))); + return median + sigma * at::tan(c10::pi * (val - static_cast(0.5))); } /** diff --git a/aten/src/ATen/cpu/vec256/vec256_bfloat16.h b/aten/src/ATen/cpu/vec256/vec256_bfloat16.h index dbe9cf374d9..d2fae23b971 100644 --- a/aten/src/ATen/cpu/vec256/vec256_bfloat16.h +++ b/aten/src/ATen/cpu/vec256/vec256_bfloat16.h @@ -210,7 +210,7 @@ public: const auto nan_vec = _mm256_set1_ps(NAN); const auto not_nan_mask = _mm256_cmp_ps(values, values, _CMP_EQ_OQ); const auto nan_mask = _mm256_cmp_ps(not_nan_mask, zero_vec, _CMP_EQ_OQ); - const auto pi = _mm256_set1_ps(M_PI); + const auto pi = _mm256_set1_ps(c10::pi); const auto neg_mask = _mm256_cmp_ps(values, zero_vec, _CMP_LT_OQ); auto angle = _mm256_blendv_ps(zero_vec, pi, neg_mask); diff --git a/aten/src/ATen/cpu/vec256/vec256_complex_double.h b/aten/src/ATen/cpu/vec256/vec256_complex_double.h index a9f9b6a776c..3779b2b0ad4 100644 --- a/aten/src/ATen/cpu/vec256/vec256_complex_double.h +++ b/aten/src/ATen/cpu/vec256/vec256_complex_double.h @@ -204,7 +204,8 @@ public: } Vec256> acos() const { // acos(x) = pi/2 - asin(x) - const __m256d pi_2 = _mm256_setr_pd(M_PI/2, 0.0, M_PI/2, 0.0); + constexpr auto pi_2d = c10::pi / 2; + const __m256d pi_2 = _mm256_setr_pd(pi_2d, 0.0, pi_2d, 0.0); return _mm256_sub_pd(pi_2, asin()); } Vec256> atan() const; diff --git a/aten/src/ATen/cpu/vec256/vec256_complex_float.h b/aten/src/ATen/cpu/vec256/vec256_complex_float.h index 0398567629c..2b76ba49046 100644 --- a/aten/src/ATen/cpu/vec256/vec256_complex_float.h +++ b/aten/src/ATen/cpu/vec256/vec256_complex_float.h @@ -242,7 +242,8 @@ public: } Vec256> acos() const { // acos(x) = pi/2 - asin(x) - const __m256 pi_2 = _mm256_setr_ps(M_PI/2, 0.0, M_PI/2, 0.0, M_PI/2, 0.0, M_PI/2, 0.0); + constexpr float pi_2f = c10::pi / 2; + const __m256 pi_2 = _mm256_setr_ps(pi_2f, 0.0, pi_2f, 0.0, pi_2f, 0.0, pi_2f, 0.0); return _mm256_sub_ps(pi_2, asin()); } Vec256> atan() const; diff --git a/aten/src/ATen/cpu/vec256/vec256_double.h b/aten/src/ATen/cpu/vec256/vec256_double.h index 0bea07dbf59..75a423d62fe 100644 --- a/aten/src/ATen/cpu/vec256/vec256_double.h +++ b/aten/src/ATen/cpu/vec256/vec256_double.h @@ -112,7 +112,7 @@ public: const auto nan_vec = _mm256_set1_pd(NAN); const auto not_nan_mask = _mm256_cmp_pd(values, values, _CMP_EQ_OQ); const auto nan_mask = _mm256_cmp_pd(not_nan_mask, zero_vec, _CMP_EQ_OQ); - const auto pi = _mm256_set1_pd(M_PI); + const auto pi = _mm256_set1_pd(c10::pi); const auto neg_mask = _mm256_cmp_pd(values, zero_vec, _CMP_LT_OQ); auto angle = _mm256_blendv_pd(zero_vec, pi, neg_mask); diff --git a/aten/src/ATen/cpu/vec256/vec256_float.h b/aten/src/ATen/cpu/vec256/vec256_float.h index a8fd65b0ba7..62786beef57 100644 --- a/aten/src/ATen/cpu/vec256/vec256_float.h +++ b/aten/src/ATen/cpu/vec256/vec256_float.h @@ -119,7 +119,7 @@ public: const auto nan_vec = _mm256_set1_ps(NAN); const auto not_nan_mask = _mm256_cmp_ps(values, values, _CMP_EQ_OQ); const auto nan_mask = _mm256_cmp_ps(not_nan_mask, zero_vec, _CMP_EQ_OQ); - const auto pi = _mm256_set1_ps(M_PI); + const auto pi = _mm256_set1_ps(c10::pi); const auto neg_mask = _mm256_cmp_ps(values, zero_vec, _CMP_LT_OQ); auto angle = _mm256_blendv_ps(zero_vec, pi, neg_mask); diff --git a/aten/src/ATen/native/Distributions.h b/aten/src/ATen/native/Distributions.h index 8b12ba6b287..2a733bd7a99 100644 --- a/aten/src/ATen/native/Distributions.h +++ b/aten/src/ATen/native/Distributions.h @@ -3,6 +3,7 @@ #include #include #include +#include // ROCM hcc doesn't work well with using std:: in kernel functions #if defined(__CUDA_ARCH__) @@ -276,8 +277,8 @@ C10_DEVICE static inline scalar_t digamma_one(scalar_t x) { } // it is more standard to write this as recursion, but // nvcc does not like that - additional_summand = -static_cast(M_PI) / - compat_tan(static_cast(M_PI) * x); + additional_summand = -c10::pi / + compat_tan(c10::pi * x); x = 1 - x; } diff --git a/aten/src/ATen/native/Loss.cpp b/aten/src/ATen/native/Loss.cpp index f5738dd83d0..44506e05858 100644 --- a/aten/src/ATen/native/Loss.cpp +++ b/aten/src/ATen/native/Loss.cpp @@ -1,10 +1,3 @@ -// define constants like M_PI and C keywords for MSVC -#ifdef _MSC_VER -#ifndef _USE_MATH_DEFINES -#define _USE_MATH_DEFINES -#endif -#include -#endif #include #include #include @@ -247,7 +240,7 @@ Tensor poisson_nll_loss(const Tensor& input, const Tensor& target, const bool lo } if (full) { - auto stirling_term = target * at::log(target) - target + 0.5 * at::log(2 * M_PI * target); + auto stirling_term = target * at::log(target) - target + 0.5 * at::log(2 * c10::pi * target); loss += stirling_term.masked_fill(target <= 1, 0); } diff --git a/aten/src/ATen/native/Math.h b/aten/src/ATen/native/Math.h index 954ec001a7c..f85dc2320e7 100644 --- a/aten/src/ATen/native/Math.h +++ b/aten/src/ATen/native/Math.h @@ -6,11 +6,9 @@ #include #include #include +#include #include -#ifndef M_PIf -#define M_PIf 3.1415926535f -#endif // M_PIf /* The next function is taken from https://github.com/antelopeusersgroup/antelope_contrib/blob/master/lib/location/libgenloc/erfinv.c. Below is the copyright. @@ -105,8 +103,8 @@ Date: February 1996 #endif } /* Two steps of Newton-Raphson correction */ - x = x - (std::erf(x) - y) / ((static_cast(2.0)/static_cast(std::sqrt(M_PI)))*std::exp(-x*x)); - x = x - (std::erf(x) - y) / ((static_cast(2.0)/static_cast(std::sqrt(M_PI)))*std::exp(-x*x)); + x = x - (std::erf(x) - y) / ((static_cast(2.0)/static_cast(std::sqrt(c10::pi)))*std::exp(-x*x)); + x = x - (std::erf(x) - y) / ((static_cast(2.0)/static_cast(std::sqrt(c10::pi)))*std::exp(-x*x)); return(x); } @@ -241,8 +239,8 @@ static inline double trigamma(double x) { double result = 0; if (x < 0.5) { sign = -1; - const double sin_pi_x = sin(M_PI * x); - result -= (M_PI * M_PI) / (sin_pi_x * sin_pi_x); + const double sin_pi_x = sin(c10::pi * x); + result -= (c10::pi * c10::pi) / (sin_pi_x * sin_pi_x); x = 1 - x; } for (int i = 0; i < 6; ++i) { @@ -259,8 +257,8 @@ static inline float trigamma(float x) { float result = 0; if (x < 0.5f) { sign = -1; - const float sin_pi_x = sinf(M_PIf * x); - result -= (M_PIf * M_PIf) / (sin_pi_x * sin_pi_x); + const float sin_pi_x = sinf(c10::pi * x); + result -= (c10::pi * c10::pi) / (sin_pi_x * sin_pi_x); x = 1 - x; } for (int i = 0; i < 6; ++i) { @@ -292,7 +290,7 @@ static inline double calc_digamma(double x) { // If the argument is a negative integer, NaN is returned return std::numeric_limits::quiet_NaN(); } - return calc_digamma(1 - x) - M_PI / tan(M_PI * x); + return calc_digamma(1 - x) - c10::pi / tan(c10::pi * x); } // Push x to be >= 10 @@ -346,7 +344,7 @@ static inline float calc_digamma(float x) { } // Avoid rounding errors for `tan`'s input. // Those make a big difference at extreme values. - float pi_over_tan_pi_x = (float)(M_PI / tan(M_PI * (double)x)); + float pi_over_tan_pi_x = (float)(c10::pi / tan(c10::pi * (double)x)); return calc_digamma(1 - x) - pi_over_tan_pi_x; } @@ -883,7 +881,7 @@ static scalar_t _igam_helper_asymptotic_series(scalar_t a, scalar_t x, bool igam absoldterm = absterm; afac /= a; } - res += sgn * std::exp(-0.5 * a * eta * eta) * sum / std::sqrt(2 * M_PIf * a); + res += sgn * std::exp(-0.5 * a * eta * eta) * sum / std::sqrt(2 * c10::pi * a); return res; } diff --git a/aten/src/ATen/native/SpectralOps.cpp b/aten/src/ATen/native/SpectralOps.cpp index 289d1128d2f..a49076ad02f 100644 --- a/aten/src/ATen/native/SpectralOps.cpp +++ b/aten/src/ATen/native/SpectralOps.cpp @@ -1,11 +1,3 @@ -// define constants like M_PI and C keywords for MSVC -#ifdef _MSC_VER -#ifndef _USE_MATH_DEFINES -#define _USE_MATH_DEFINES -#endif -#include -#endif - #include #include #include diff --git a/aten/src/ATen/native/TensorFactories.cpp b/aten/src/ATen/native/TensorFactories.cpp index 10c2d6ee999..0af777ed6b1 100644 --- a/aten/src/ATen/native/TensorFactories.cpp +++ b/aten/src/ATen/native/TensorFactories.cpp @@ -1,11 +1,3 @@ -// define constants like M_PI and C keywords for MSVC -#ifdef _MSC_VER -#ifndef _USE_MATH_DEFINES -#define _USE_MATH_DEFINES -#endif -#include -#endif - #include #include #include @@ -14,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -915,7 +908,7 @@ Tensor blackman_window( window_length += 1; } // from https://en.wikipedia.org/wiki/Window_function#Blackman_window - auto window = native::arange(window_length, options).mul_(M_PI / static_cast(window_length - 1)); + auto window = native::arange(window_length, options).mul_(c10::pi / static_cast(window_length - 1)); window = window.mul(4).cos_().mul_(0.08) - window.mul(2).cos_().mul_(0.5) + 0.42; return periodic ? window.narrow(0, 0, window_length - 1) : window; } @@ -960,7 +953,7 @@ Tensor hamming_window( window_length += 1; } auto window = native::arange(window_length, options); - window.mul_(M_PI * 2. / static_cast(window_length - 1)).cos_().mul_(-beta).add_(alpha); + window.mul_(c10::pi * 2. / static_cast(window_length - 1)).cos_().mul_(-beta).add_(alpha); return periodic ? window.narrow(0, 0, window_length - 1) : window; } diff --git a/aten/src/ATen/native/UnaryOps.cpp b/aten/src/ATen/native/UnaryOps.cpp index e2d1de5c07b..ca311f86091 100644 --- a/aten/src/ATen/native/UnaryOps.cpp +++ b/aten/src/ATen/native/UnaryOps.cpp @@ -1,11 +1,3 @@ -// define constants like M_PI and C keywords for MSVC -#ifdef _MSC_VER -#ifndef _USE_MATH_DEFINES -#define _USE_MATH_DEFINES -#endif -#include -#endif - #include #include #include @@ -16,6 +8,7 @@ #include #include +#include #include #include #include @@ -647,14 +640,14 @@ Tensor mvlgamma(const Tensor& self, int64_t p) { mvlgamma_check(self, p); Tensor args = native::arange(-p / 2. + 0.5, 0.5, 0.5, self.options()); args = args.add(self.unsqueeze(-1)); - return args.lgamma_().sum(-1).add_(p * (p - 1) * std::log(M_PI) / 4.); + return args.lgamma_().sum(-1).add_(p * (p - 1) * std::log(c10::pi) / 4.); } Tensor& mvlgamma_(Tensor& self, int64_t p) { mvlgamma_check(self, p); Tensor args = native::arange(-p / 2. + 0.5, 0.5, 0.5, self.options()); args = args.add(self.unsqueeze(-1)); - return self.copy_(args.lgamma_().sum(-1).add_(p * (p - 1) * std::log(M_PI) / 4.)); + return self.copy_(args.lgamma_().sum(-1).add_(p * (p - 1) * std::log(c10::pi) / 4.)); } // NB: If you use this macro, you may also need to add a CUDA forwarding diff --git a/aten/src/ATen/native/cpu/DistributionTemplates.h b/aten/src/ATen/native/cpu/DistributionTemplates.h index d4b6da57111..5f5d47e940b 100644 --- a/aten/src/ATen/native/cpu/DistributionTemplates.h +++ b/aten/src/ATen/native/cpu/DistributionTemplates.h @@ -112,7 +112,7 @@ void normal_fill_AVX2(Tensor& self, const float mean, const float std, RNG gener at::uniform_real_distribution uniform(0, 1); data[i] = uniform(generator); } - const __m256 two_pi = _mm256_set1_ps(2.0f * M_PI); + const __m256 two_pi = _mm256_set1_ps(2.0f * c10::pi); const __m256 one = _mm256_set1_ps(1.0f); const __m256 minus_two = _mm256_set1_ps(-2.0f); const __m256 mean_v = _mm256_set1_ps(mean); @@ -140,7 +140,7 @@ static void normal_fill_16(scalar_t *data, const scalar_t mean, const scalar_t s const scalar_t u1 = 1 - data[j]; // [0, 1) -> (0, 1] for log. const scalar_t u2 = data[j + 8]; const scalar_t radius = std::sqrt(-2 * std::log(u1)); - const scalar_t theta = 2.0f * M_PI * u2; + const scalar_t theta = 2.0f * c10::pi * u2; data[j] = radius * std::cos(theta) * std + mean; data[j + 8] = radius * std::sin(theta) * std + mean; } diff --git a/aten/src/ATen/native/cpu/UnaryOpsKernel.cpp b/aten/src/ATen/native/cpu/UnaryOpsKernel.cpp index 6ed4b3af6f2..a79d437ec94 100644 --- a/aten/src/ATen/native/cpu/UnaryOpsKernel.cpp +++ b/aten/src/ATen/native/cpu/UnaryOpsKernel.cpp @@ -16,11 +16,11 @@ #include #include #include -#include #include #include #include #include +#include #if AT_MKL_ENABLED() #include @@ -310,7 +310,7 @@ static void sinc_kernel(TensorIterator& iter) { if (a == scalar_t(0)) { return scalar_t(1); } else { - scalar_t product = scalar_t(M_PI) * a; + scalar_t product = c10::pi * a; return std::sin(product) / product; } }); diff --git a/aten/src/ATen/native/cpu/zmath.h b/aten/src/ATen/native/cpu/zmath.h index 7be18ef519b..0a5c8f2ab34 100644 --- a/aten/src/ATen/native/cpu/zmath.h +++ b/aten/src/ATen/native/cpu/zmath.h @@ -3,6 +3,7 @@ // Complex number math operations that act as no-ops for other dtypes. #include #include +#include #include namespace at { namespace native { @@ -44,7 +45,7 @@ inline VALUE_TYPE angle_impl (SCALAR_TYPE z) { if (at::_isnan(z)) { return z; } - return z < 0 ? M_PI : 0; + return z < 0 ? c10::pi : 0; } template<> diff --git a/aten/src/ATen/native/cuda/UnaryOpsKernel.cu b/aten/src/ATen/native/cuda/UnaryOpsKernel.cu index e727335aaf1..585ea9e0a26 100644 --- a/aten/src/ATen/native/cuda/UnaryOpsKernel.cu +++ b/aten/src/ATen/native/cuda/UnaryOpsKernel.cu @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -109,7 +110,8 @@ void sinc_kernel_cuda(TensorIterator& iter) { if (a == scalar_t(0)) { return scalar_t(1); } else { - scalar_t product = scalar_t(M_PI) * a; + // NVCC says constexpr var is not accessible from device + scalar_t product = c10::detail::pi() * a; return std::sin(product) / product; } }); diff --git a/aten/src/TH/THGeneral.h.in b/aten/src/TH/THGeneral.h.in index d9ee9590c8b..ba2d61f400c 100644 --- a/aten/src/TH/THGeneral.h.in +++ b/aten/src/TH/THGeneral.h.in @@ -69,10 +69,6 @@ # define TH_UNUSED #endif -#ifndef M_PI -# define M_PI 3.14159265358979323846 -#endif - typedef void (*THErrorHandlerFunction)(const char *msg, void *data); typedef void (*THArgErrorHandlerFunction)(int argNumber, const char *msg, void *data); diff --git a/c10/util/MathConstants.cpp b/c10/util/MathConstants.cpp new file mode 100644 index 00000000000..78e04a88191 --- /dev/null +++ b/c10/util/MathConstants.cpp @@ -0,0 +1,12 @@ +// define constants like M_PI and C keywords for MSVC +#ifdef _MSC_VER +#ifndef _USE_MATH_DEFINES +#define _USE_MATH_DEFINES +#endif +#endif + +#include + +#include + +static_assert(M_PI == c10::pi, "c10::pi must be equal to M_PI"); diff --git a/c10/util/MathConstants.h b/c10/util/MathConstants.h new file mode 100644 index 00000000000..d90d5d7effe --- /dev/null +++ b/c10/util/MathConstants.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include +#include + +namespace c10 { +// TODO: Replace me with inline constexpr variable when C++17 becomes available +namespace detail { +template +C10_HOST_DEVICE inline constexpr T pi() { + return static_cast(3.14159265358979323846L); +} + +template<> +C10_HOST_DEVICE inline constexpr BFloat16 pi() { + // According to https://en.wikipedia.org/wiki/Bfloat16_floating-point_format#Special_values pi is encoded as 4049 + return BFloat16(0x4049, BFloat16::from_bits()); +} +template<> +C10_HOST_DEVICE inline constexpr Half pi() { + return Half(0x4248, Half::from_bits()); +} +} // namespace detail + +// TODO: Replace me with std::numbers::pi when C++20 is there +template +constexpr T pi = c10::detail::pi(); + +} // namespace c10