onnxruntime/cmake/patches/cutlass/cutlass.patch

diff --git a/include/cute/numeric/complex.hpp b/include/cute/numeric/complex.hpp
index 3790ebd3..cf727d09 100644
--- a/include/cute/numeric/complex.hpp
+++ b/include/cute/numeric/complex.hpp
@@ -41,10 +41,14 @@
 // With CUDA 11.4, builds show spurious "-Wconversion" warnings
 // on line 656 of thrust/detail/type_traits.h.
 // These pragmas suppress the warnings.
+#ifdef __GNUC__
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wconversion"
+#endif
 #include <thrust/complex.h>
+#ifdef __GNUC__
 #pragma GCC diagnostic pop
+#endif

 #include <cute/config.hpp>

diff --git a/include/cutlass/functional.h b/include/cutlass/functional.h
index 59aec46a..8f2a913a 100644
--- a/include/cutlass/functional.h
+++ b/include/cutlass/functional.h
@@ -89,7 +89,7 @@ struct multiplies {
   }
 };

-#if defined(__CUDA_ARCH__)
+#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
 /// Partial specializations needed when __CUDA_NO_HALF2_OPERATORS__ is set
 template<>
 struct plus<__half2> {
@@ -143,12 +143,12 @@ struct multiplies<__half> {


 // Maximum with nan propogation
-// To propgate the NANs, the "max" of a two element that contains NaNs should also return a NaN
+// To propgate the NANs, the "max" of a two element that contains NaNs should also return a NaN
 template <typename T>
 struct maximum_with_nan_propogation {
   CUTLASS_HOST_DEVICE
   T operator()(T const &lhs, T const &rhs) const {
-    return lhs > rhs or std::isnan(lhs) ? lhs : rhs;
+    return lhs > rhs or isnan(lhs) ? lhs : rhs;
   }
 };

@@ -160,7 +160,7 @@ struct maximum_with_nan_propogation<float> {
 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 800)
     asm volatile("max.NaN.f32 %0, %1, %2;\n" : "=f"(res) : "f"(lhs), "f"(rhs));
 #else
-    res = lhs > rhs or std::isnan(lhs) ? lhs : rhs;
+    res = lhs > rhs or isnan(lhs) ? lhs : rhs;
 #endif
     return res;
   }
@@ -233,7 +233,7 @@ struct negate {
   }
 };

-/// Greater equal
+/// Greater equal
 template <typename T>
 struct greater_equal {
   CUTLASS_HOST_DEVICE
@@ -242,7 +242,7 @@ struct greater_equal {
   }
 };

-/// Greater
+/// Greater
 template <typename T>
 struct greater {
   CUTLASS_HOST_DEVICE
@@ -251,7 +251,7 @@ struct greater {
   }
 };

-/// Less equal
+/// Less equal
 template <typename T>
 struct less_equal {
   CUTLASS_HOST_DEVICE
@@ -260,7 +260,7 @@ struct less_equal {
   }
 };

-/// Less
+/// Less
 template <typename T>
 struct less {
   CUTLASS_HOST_DEVICE