mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-28 03:20:58 +00:00
Disable unit tests for decoder masked multihead attention on CC 5.2 or lower GPUs (#15114)
This commit is contained in:
parent
226a691e05
commit
0ace27fdf7
1 changed files with 10 additions and 0 deletions
|
|
@ -635,6 +635,11 @@ std::vector<MLFloat16> Softmax_QK_Transpose_V(MLFloat16* softmax_qk_transpose_ma
|
|||
return output;
|
||||
}
|
||||
TEST(DecoderMaskedMultiheadAttentionTest, Test_fp32) {
|
||||
// The kernel is only supported on CC 5.3 or higher GPUs
|
||||
if (NeedSkipIfCudaArchLowerThan(530)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Vary batch size
|
||||
for (int batch_size = 1; batch_size <= 5; batch_size += 2) {
|
||||
// Vary kv_lengths
|
||||
|
|
@ -742,6 +747,11 @@ TEST(DecoderMaskedMultiheadAttentionTest, Test_fp32) {
|
|||
}
|
||||
|
||||
TEST(DecoderMaskedMultiheadAttentionTest, Test_fp16) {
|
||||
// The kernel is only supported on CC 5.3 or higher GPUs
|
||||
if (NeedSkipIfCudaArchLowerThan(530)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Vary batch size
|
||||
for (int batch_size = 1; batch_size <= 5; batch_size += 2) {
|
||||
// Vary kv_lengths
|
||||
|
|
|
|||
Loading…
Reference in a new issue