From 8f7eb75c3ef659da5cc85100a31199c5c4e31808 Mon Sep 17 00:00:00 2001 From: Yufeng Li Date: Thu, 12 Jan 2023 09:06:05 -0800 Subject: [PATCH] fix greedysearch token out of range bug (#14242) Bug: the last sentence generates token out of vocabulary size. Cause: total element should be computed with padded vocabulary size. --- .../contrib_ops/cuda/transformers/generation_cuda_impl.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/contrib_ops/cuda/transformers/generation_cuda_impl.cu b/onnxruntime/contrib_ops/cuda/transformers/generation_cuda_impl.cu index bf98394d1c..600eb50648 100644 --- a/onnxruntime/contrib_ops/cuda/transformers/generation_cuda_impl.cu +++ b/onnxruntime/contrib_ops/cuda/transformers/generation_cuda_impl.cu @@ -181,7 +181,7 @@ void LaunchLogitsProcessKernel( float repetition_penalty, int no_repeat_ngram_size, cudaStream_t stream) { - int total_elements = batch_size * num_beams * vocab_size; + int total_elements = batch_size * num_beams * padded_vocab_size; constexpr int blockSize = 256; const int gridSize = (total_elements + blockSize - 1) / blockSize; LogitsProcessKernel<<>>(