mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-24 22:17:32 +00:00
Transformer model CUDA EP align with CPU on corner case (#9889)
* align with cpu on no input data * review comments and add tests Co-authored-by: Ubuntu <wy@linux-v100.aidmrjtolptuzevavgwhrapqcd.jx.internal.cloudapp.net>
This commit is contained in:
parent
63198a6566
commit
bb09acffed
6 changed files with 59 additions and 15 deletions
|
|
@ -44,6 +44,9 @@ Status FastGelu<T>::ComputeInternal(OpKernelContext* context) const {
|
|||
Tensor* output = context->Output(0, input->Shape());
|
||||
|
||||
int64_t input_length = input->Shape().Size();
|
||||
if (input_length == 0) {
|
||||
return Status::OK();
|
||||
}
|
||||
int64_t bias_length = (nullptr == bias) ? 0 : bias->Shape().Size();
|
||||
typedef typename ToCudaType<T>::MappedType CudaT;
|
||||
|
||||
|
|
|
|||
|
|
@ -41,12 +41,13 @@ Status SkipLayerNorm<T>::ComputeInternal(OpKernelContext* ctx) const {
|
|||
|
||||
Tensor* output = ctx->Output(0, input->Shape());
|
||||
|
||||
if (input->SizeInBytes() == 0) {
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Inputs 'input' has no data from upstream nodes");
|
||||
if (input->Shape() != skip->Shape()) {
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
|
||||
"skip is expected to have same shape as input");
|
||||
}
|
||||
|
||||
if (skip->SizeInBytes() == 0) {
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Inputs 'skip' has no data from upstream nodes");
|
||||
if (input->Shape().Size() == 0) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
const auto& input_dims = input->Shape().GetDims();
|
||||
|
|
@ -55,11 +56,6 @@ Status SkipLayerNorm<T>::ComputeInternal(OpKernelContext* ctx) const {
|
|||
"input is expected to have 3 dimensions, got ", input_dims.size());
|
||||
}
|
||||
|
||||
if (input->Shape() != skip->Shape()) {
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
|
||||
"skip is expected to have same shape as input");
|
||||
}
|
||||
|
||||
const auto& gamma_dims = gamma->Shape().GetDims();
|
||||
if (gamma_dims.size() != 1) {
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
|
||||
|
|
|
|||
|
|
@ -59,12 +59,6 @@ Status LayerNorm<T, U, simplified>::ComputeInternal(OpKernelContext* ctx) const
|
|||
auto bias_data = (simplified || (nullptr == bias)) ? nullptr : reinterpret_cast<const CudaT*>(bias->template Data<T>());
|
||||
|
||||
const TensorShape& x_shape = X->Shape();
|
||||
// Sometimes due to conversion issue, the input 'X' has no data which is a case that cuda kernel cannot handle.
|
||||
// Provide more error infomation here instead of CUDA errors.
|
||||
if (X->SizeInBytes() == 0) {
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Inputs 'X' has no data from upstream nodes");
|
||||
}
|
||||
|
||||
const int64_t axis = HandleNegativeAxis(axis_, x_shape.NumDimensions());
|
||||
|
||||
int n1 = gsl::narrow<int>(x_shape.SizeToDimension(axis));
|
||||
|
|
@ -101,6 +95,10 @@ Status LayerNorm<T, U, simplified>::ComputeInternal(OpKernelContext* ctx) const
|
|||
inv_var_data = reinterpret_cast<CudaU*>(var->template MutableData<U>());
|
||||
}
|
||||
|
||||
if (x_shape.Size() == 0) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
HostApplyLayerNorm<CudaT, CudaU, simplified>(GetDeviceProp(), Stream(), Y_data, mean_data, inv_var_data, X_data, n1, n2, epsilon_, scale_data, bias_data);
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -110,6 +110,19 @@ static void RunFastGeluTest(
|
|||
RunFastGeluTest(input_data, bias_data, output_data, input_dims, bias_dims, output_dims, has_bias);
|
||||
}
|
||||
|
||||
TEST(FastGeluTest, FastGeluWithNullInput) {
|
||||
int batch_size = 1;
|
||||
int sequence_length = 0;
|
||||
int hidden_size = 4;
|
||||
|
||||
std::vector<float> input_data = {};
|
||||
|
||||
std::vector<float> bias_data = {
|
||||
-0.5f, 0.6f, 1.2f, 2.1f};
|
||||
|
||||
RunFastGeluTest(input_data, bias_data, batch_size, sequence_length, hidden_size);
|
||||
}
|
||||
|
||||
TEST(FastGeluTest, FastGeluWithBiasFloat32) {
|
||||
int batch_size = 1;
|
||||
int sequence_length = 2;
|
||||
|
|
|
|||
|
|
@ -80,6 +80,11 @@ static void TestLayerNorm(const std::vector<int64_t>& x_dims,
|
|||
#endif
|
||||
}
|
||||
|
||||
TEST(CudaKernelTest, LayerNorm_NullInput) {
|
||||
const std::vector<int64_t> X_dims{0, 20, 128};
|
||||
TestLayerNorm(X_dims, LAYER_NORM_OP, k_epsilon_default);
|
||||
}
|
||||
|
||||
TEST(CudaKernelTest, LayerNorm_SmallSizeTensor) {
|
||||
const std::vector<int64_t> X_dims{4, 20, 128};
|
||||
TestLayerNorm(X_dims, LAYER_NORM_OP, k_epsilon_default);
|
||||
|
|
|
|||
|
|
@ -83,6 +83,35 @@ static void RunTest(
|
|||
}
|
||||
}
|
||||
|
||||
TEST(SkipLayerNormTest, SkipLayerNormNullInput) {
|
||||
int batch_size = 1;
|
||||
int sequence_length = 0;
|
||||
int hidden_size = 4;
|
||||
|
||||
std::vector<float> input_data = {};
|
||||
|
||||
std::vector<float> skip_data = {};
|
||||
|
||||
std::vector<float> gamma_data = {
|
||||
0.3f, 0.2f, 4.0f, 2.2f};
|
||||
|
||||
std::vector<float> beta_data = {
|
||||
0.2f, 0.1f, 0.4f, 1.6f};
|
||||
|
||||
std::vector<float> output_data = {};
|
||||
|
||||
RunTest(input_data,
|
||||
skip_data,
|
||||
gamma_data,
|
||||
beta_data,
|
||||
std::vector<float>(),
|
||||
output_data,
|
||||
epsilon_,
|
||||
batch_size,
|
||||
sequence_length,
|
||||
hidden_size);
|
||||
}
|
||||
|
||||
TEST(SkipLayerNormTest, SkipLayerNormBatch1) {
|
||||
int batch_size = 1;
|
||||
int sequence_length = 2;
|
||||
|
|
|
|||
Loading…
Reference in a new issue