[tests] fix static cache implementation is not compatible with attn_implementation==flash_attention_2 (#32039)

* add flash attention check

* fix

* fix
This commit is contained in:
Fanli Lin 2024-07-26 17:41:27 +08:00 committed by GitHub
parent 5f841c74b6
commit 27c7f971c0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -290,7 +290,7 @@ class CacheIntegrationTest(unittest.TestCase):
self.assertTrue(decoded[0].endswith(last_output))
@require_torch_gpu
@parameterized.expand(["eager", "sdpa", "flash_attention_2"])
@parameterized.expand(["eager", "sdpa"])
def test_static_cache_greedy_decoding_pad_left(self, attn_implementation):
EXPECTED_GENERATION = [
"The best color is the one that complements the skin tone of the",
@ -330,7 +330,7 @@ class CacheIntegrationTest(unittest.TestCase):
self.assertListEqual(decoded, EXPECTED_GENERATION)
@require_torch_gpu
@parameterized.expand(["eager", "sdpa", "flash_attention_2"])
@parameterized.expand(["eager", "sdpa"])
def test_static_cache_greedy_decoding_pad_right(self, attn_implementation):
EXPECTED_GENERATION = [
"The best color isЋ the one that complements the skin tone of",