From 0ace27fdf785ccb790ee407bd4dc3c218017bf78 Mon Sep 17 00:00:00 2001 From: Hariharan Seshadri Date: Mon, 20 Mar 2023 15:09:49 -0700 Subject: [PATCH] Disable unit tests for decoder masked multihead attention on CC 5.2 or lower GPUs (#15114) --- .../decoder_masked_multihead_attention_op_test.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/onnxruntime/test/contrib_ops/decoder_masked_multihead_attention_op_test.cc b/onnxruntime/test/contrib_ops/decoder_masked_multihead_attention_op_test.cc index dbd05fe83d..11972e556e 100644 --- a/onnxruntime/test/contrib_ops/decoder_masked_multihead_attention_op_test.cc +++ b/onnxruntime/test/contrib_ops/decoder_masked_multihead_attention_op_test.cc @@ -635,6 +635,11 @@ std::vector Softmax_QK_Transpose_V(MLFloat16* softmax_qk_transpose_ma return output; } TEST(DecoderMaskedMultiheadAttentionTest, Test_fp32) { + // The kernel is only supported on CC 5.3 or higher GPUs + if (NeedSkipIfCudaArchLowerThan(530)) { + return; + } + // Vary batch size for (int batch_size = 1; batch_size <= 5; batch_size += 2) { // Vary kv_lengths @@ -742,6 +747,11 @@ TEST(DecoderMaskedMultiheadAttentionTest, Test_fp32) { } TEST(DecoderMaskedMultiheadAttentionTest, Test_fp16) { + // The kernel is only supported on CC 5.3 or higher GPUs + if (NeedSkipIfCudaArchLowerThan(530)) { + return; + } + // Vary batch size for (int batch_size = 1; batch_size <= 5; batch_size += 2) { // Vary kv_lengths