From 0ace27fdf785ccb790ee407bd4dc3c218017bf78 Mon Sep 17 00:00:00 2001
From: Hariharan Seshadri <shariharan91@gmail.com>
Date: Mon, 20 Mar 2023 15:09:49 -0700
Subject: [PATCH] Disable unit tests for decoder masked multihead attention on
 CC 5.2 or lower GPUs (#15114)

---
 .../decoder_masked_multihead_attention_op_test.cc      | 10 ++++++++++
 1 file changed, 10 insertions(+)
diff --git a/onnxruntime/test/contrib_ops/decoder_masked_multihead_attention_op_test.cc b/onnxruntime/test/contrib_ops/decoder_masked_multihead_attention_op_test.cc
index dbd05fe83d..11972e556e 100644
--- a/onnxruntime/test/contrib_ops/decoder_masked_multihead_attention_op_test.cc
+++ b/onnxruntime/test/contrib_ops/decoder_masked_multihead_attention_op_test.cc
@@ -635,6 +635,11 @@ std::vector<MLFloat16> Softmax_QK_Transpose_V(MLFloat16* softmax_qk_transpose_ma
   return output;
 }
 TEST(DecoderMaskedMultiheadAttentionTest, Test_fp32) {
+  // The kernel is only supported on CC 5.3 or higher GPUs
+  if (NeedSkipIfCudaArchLowerThan(530)) {
+    return;
+  }
+
   // Vary batch size
   for (int batch_size = 1; batch_size <= 5; batch_size += 2) {
     // Vary kv_lengths
@@ -742,6 +747,11 @@ TEST(DecoderMaskedMultiheadAttentionTest, Test_fp32) {
 }
 
 TEST(DecoderMaskedMultiheadAttentionTest, Test_fp16) {
+  // The kernel is only supported on CC 5.3 or higher GPUs
+  if (NeedSkipIfCudaArchLowerThan(530)) {
+    return;
+  }
+
   // Vary batch size
   for (int batch_size = 1; batch_size <= 5; batch_size += 2) {
     // Vary kv_lengths