mirror of
https://github.com/saymrwulf/transformers.git
synced 2026-05-14 20:58:08 +00:00
[tests] fix static cache implementation is not compatible with attn_implementation==flash_attention_2 (#32039)
* add flash attention check * fix * fix
This commit is contained in:
parent
5f841c74b6
commit
27c7f971c0
1 changed files with 2 additions and 2 deletions
|
|
@ -290,7 +290,7 @@ class CacheIntegrationTest(unittest.TestCase):
|
|||
self.assertTrue(decoded[0].endswith(last_output))
|
||||
|
||||
@require_torch_gpu
|
||||
@parameterized.expand(["eager", "sdpa", "flash_attention_2"])
|
||||
@parameterized.expand(["eager", "sdpa"])
|
||||
def test_static_cache_greedy_decoding_pad_left(self, attn_implementation):
|
||||
EXPECTED_GENERATION = [
|
||||
"The best color is the one that complements the skin tone of the",
|
||||
|
|
@ -330,7 +330,7 @@ class CacheIntegrationTest(unittest.TestCase):
|
|||
self.assertListEqual(decoded, EXPECTED_GENERATION)
|
||||
|
||||
@require_torch_gpu
|
||||
@parameterized.expand(["eager", "sdpa", "flash_attention_2"])
|
||||
@parameterized.expand(["eager", "sdpa"])
|
||||
def test_static_cache_greedy_decoding_pad_right(self, attn_implementation):
|
||||
EXPECTED_GENERATION = [
|
||||
"The best color isЋ the one that complements the skin tone of",
|
||||
|
|
|
|||
Loading…
Reference in a new issue