Disable TF32 in tests of CUDA ep (#19963)

Operator or model test result shall not depend on whether
NVIDIA_TF32_OVERRIDE environment variable is set or not. This make test results more deterministic.
This commit is contained in:
Tianlei Wu 2024-03-18 11:17:34 -07:00 committed by GitHub
parent a033df8c31
commit 141966bb69
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 68 additions and 47 deletions

View file

@ -2013,13 +2013,6 @@ TEST(AttentionTest, AttentionMaskIndexOutOfRange) {
#if !defined(__wasm__)
// TODO: fix in web assembly
TEST(AttentionTest, AttentionPastState_dynamic) {
// ORT enables TF32 in GEMM for A100. TF32 will cause precsion loss and fail this test.
// Do not run this test unless TF32 is disabled explicitly.
if (HasCudaEnvironment(800) && ParseEnvironmentVariableWithDefault<int>("NVIDIA_TF32_OVERRIDE", 1) != 0) {
GTEST_SKIP() << "Skipping AttentionPastState_dynamic in A100 since TF32 is enabled";
return;
}
// create rand inputs
RandomValueGenerator random{};
@ -2101,13 +2094,6 @@ static void RunModelWithRandomInput(
std::vector<int32_t>& mask_index_data,
std::string& onnx_model,
bool is_float16) {
// ORT enables TF32 in GEMM for A100. TF32 will cause precsion loss and fail this test.
// Do not run this test unless TF32 is disabled explicitly.
if (HasCudaEnvironment(800) && ParseEnvironmentVariableWithDefault<int>("NVIDIA_TF32_OVERRIDE", 1) != 0) {
GTEST_SKIP() << "Skipping RunModelWithRandomInput in A100 since TF32 is enabled";
return;
}
RandomValueGenerator random{234};
constexpr int hidden_size = 768;

View file

@ -8,6 +8,10 @@
#include "core/session/onnxruntime_cxx_api.h"
#include "test/common/cuda_op_test_utils.h"
#ifdef USE_CUDA
#include "core/providers/cuda/cuda_provider_options.h"
#endif
extern std::unique_ptr<Ort::Env> ort_env;
namespace onnxruntime {
@ -70,7 +74,9 @@ TEST(BeamSearchTest, GptBeamSearchFp32) {
Ort::SessionOptions session_options;
#ifdef USE_CUDA
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0));
OrtCUDAProviderOptionsV2 cuda_options;
cuda_options.use_tf32 = false;
session_options.AppendExecutionProvider_CUDA_V2(cuda_options);
#endif
#ifdef USE_ROCM
@ -161,7 +167,9 @@ TEST(BeamSearchTest, GptBeamSearchFp16) {
if (enable_cuda || enable_rocm) {
Ort::SessionOptions session_options;
#ifdef USE_CUDA
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0));
OrtCUDAProviderOptionsV2 cuda_options;
cuda_options.use_tf32 = false;
session_options.AppendExecutionProvider_CUDA_V2(cuda_options);
#endif
#ifdef USE_ROCM
@ -254,7 +262,9 @@ TEST(BeamSearchTest, GptBeamSearchWithInitDecoderFp16) {
if (enable_cuda || enable_rocm) {
Ort::SessionOptions session_options;
#ifdef USE_CUDA
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0));
OrtCUDAProviderOptionsV2 cuda_options;
cuda_options.use_tf32 = false;
session_options.AppendExecutionProvider_CUDA_V2(cuda_options);
#endif
#ifdef USE_ROCM
@ -346,7 +356,9 @@ TEST(BeamSearchTest, GptBeamSearchFp16_VocabPadded) {
if (enable_cuda || enable_rocm) {
Ort::SessionOptions session_options;
#ifdef USE_CUDA
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0));
OrtCUDAProviderOptionsV2 cuda_options;
cuda_options.use_tf32 = false;
session_options.AppendExecutionProvider_CUDA_V2(cuda_options);
#endif
#ifdef USE_ROCM

View file

@ -8,6 +8,10 @@
#include "core/session/onnxruntime_cxx_api.h"
#include "test/common/cuda_op_test_utils.h"
#ifdef USE_CUDA
#include "core/providers/cuda/cuda_provider_options.h"
#endif
extern std::unique_ptr<Ort::Env> ort_env;
namespace onnxruntime {
@ -64,9 +68,13 @@ TEST(GreedySearchTest, GptGreedySearchFp16_VocabPadded) {
if (is_cuda || is_rocm) {
Ort::SessionOptions session_options;
#ifdef USE_CUDA
if (is_cuda) {
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0));
OrtCUDAProviderOptionsV2 cuda_options;
cuda_options.use_tf32 = false;
session_options.AppendExecutionProvider_CUDA_V2(cuda_options);
}
#endif
if (is_rocm) {
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(session_options, 0));
}
@ -145,9 +153,13 @@ TEST(GreedySearchTest, GptGreedySearchFp32) {
if (is_cuda || is_rocm) {
Ort::SessionOptions session_options;
#ifdef USE_CUDA
if (is_cuda) {
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0));
OrtCUDAProviderOptionsV2 cuda_options;
cuda_options.use_tf32 = false;
session_options.AppendExecutionProvider_CUDA_V2(cuda_options);
}
#endif
if (is_rocm) {
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(session_options, 0));
}

View file

@ -433,8 +433,7 @@ static void RunModelWithRandomInput(
std::vector<int64_t> token_offset_dims{batch_size, sequence_length};
std::vector<int64_t> cum_seq_len_dims{batch_size + 1};
// TF32 in SM >= 80 is enabled by default, need larger threshold for float when TF32 is enabled.
float gpu_threshold = is_float16 ? 0.15f : (HasCudaEnvironment(800) ? 0.05f : 0.005f);
float gpu_threshold = is_float16 ? 0.15f : 0.005f;
gpu_threshold *= sequence_length > 1024 ? 4.0f : 1.0f; // threshold should increase with sequence length
bool enable_cuda = HasCudaEnvironment(is_float16 ? 530 : 0);
if (enable_cuda) {

View file

@ -8,6 +8,10 @@
#include "core/session/onnxruntime_cxx_api.h"
#include "test/common/cuda_op_test_utils.h"
#ifdef USE_CUDA
#include "core/providers/cuda/cuda_provider_options.h"
#endif
extern std::unique_ptr<Ort::Env> ort_env;
namespace onnxruntime {
@ -65,7 +69,10 @@ TEST(SamplingTest, Gpt2Sampling_GPU) {
LOGS_DEFAULT(WARNING) << "Hardware NOT support current architecture";
return;
}
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0));
OrtCUDAProviderOptionsV2 cuda_options;
cuda_options.use_tf32 = false;
session_options.AppendExecutionProvider_CUDA_V2(cuda_options);
#else // USE_ROCM
OrtROCMProviderOptions rocm_options;
// TODO - verify the default settings

View file

@ -25,6 +25,10 @@
#include "core/session/onnxruntime_session_options_config_keys.h"
#include "nlohmann/json.hpp"
#ifdef USE_CUDA
#include "core/providers/cuda/cuda_provider_options.h"
#endif
using namespace onnxruntime;
namespace {
@ -401,12 +405,15 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
if (enable_tensorrt) {
#ifdef USE_TENSORRT
OrtCUDAProviderOptions cuda_options;
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Tensorrt(sf, device_id));
#ifdef USE_CUDA
OrtCUDAProviderOptionsV2 cuda_options;
cuda_options.device_id = device_id;
cuda_options.do_copy_in_default_stream = true;
cuda_options.use_tf32 = false;
// TODO: Support arena configuration for users of test runner
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Tensorrt(sf, device_id));
sf.AppendExecutionProvider_CUDA(cuda_options);
sf.AppendExecutionProvider_CUDA_V2(cuda_options);
#endif
#else
fprintf(stderr, "TensorRT is not supported in this build");
return -1;
@ -424,10 +431,11 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
}
if (enable_cuda) {
#ifdef USE_CUDA
OrtCUDAProviderOptions cuda_options;
OrtCUDAProviderOptionsV2 cuda_options;
cuda_options.do_copy_in_default_stream = true;
cuda_options.use_tf32 = false;
// TODO: Support arena configuration for users of test runner
sf.AppendExecutionProvider_CUDA(cuda_options);
sf.AppendExecutionProvider_CUDA_V2(cuda_options);
#else
fprintf(stderr, "CUDA is not supported in this build");
return -1;

View file

@ -98,21 +98,6 @@ TEST_P(ModelTest, Run) {
std::unique_ptr<OnnxModelInfo> model_info = std::make_unique<OnnxModelInfo>(model_path.c_str());
#if defined(__linux__)
// ORT enables TF32 in GEMM for A100. TF32 will cause precsion loss and fail this test.
if (HasCudaEnvironment(800) && provider_name == "cuda") {
per_sample_tolerance = 1e-1;
if (model_path.find(ORT_TSTR("SSD")) > 0 ||
model_path.find(ORT_TSTR("ssd")) > 0 ||
model_path.find(ORT_TSTR("yolov3")) > 0 ||
model_path.find(ORT_TSTR("mask_rcnn")) > 0 ||
model_path.find(ORT_TSTR("FNS")) > 0) {
SkipTest("Skipping SSD test for big tolearance failure or other errors");
return;
}
}
#endif
if (model_info->HasDomain(ONNX_NAMESPACE::AI_ONNX_TRAINING_DOMAIN) ||
model_info->HasDomain(ONNX_NAMESPACE::AI_ONNX_PREVIEW_TRAINING_DOMAIN)) {
SkipTest("it has the training domain. No pipeline should need to run these tests.");
@ -192,12 +177,14 @@ TEST_P(ModelTest, Run) {
ASSERT_ORT_STATUS_OK(OrtApis::CreateCUDAProviderOptions(&cuda_options));
std::unique_ptr<OrtCUDAProviderOptionsV2, decltype(&OrtApis::ReleaseCUDAProviderOptions)> rel_cuda_options(
cuda_options, &OrtApis::ReleaseCUDAProviderOptions);
std::vector<const char*> keys{"device_id"};
std::vector<const char*> keys{"device_id", "use_tf32"};
std::vector<const char*> values;
std::string device_id = Env::Default().GetEnvironmentVar("ONNXRUNTIME_TEST_GPU_DEVICE_ID");
values.push_back(device_id.empty() ? "0" : device_id.c_str());
ASSERT_ORT_STATUS_OK(OrtApis::UpdateCUDAProviderOptions(cuda_options, keys.data(), values.data(), 1));
values.push_back("0");
ASSERT_ORT_STATUS_OK(OrtApis::UpdateCUDAProviderOptions(cuda_options, keys.data(), values.data(), 2));
ortso.AppendExecutionProvider_CUDA_V2(*cuda_options);
} else if (provider_name == "rocm") {
OrtROCMProviderOptions ep_options;
@ -229,6 +216,14 @@ TEST_P(ModelTest, Run) {
ASSERT_ORT_STATUS_OK(OrtApis::CreateCUDAProviderOptions(&cuda_options));
std::unique_ptr<OrtCUDAProviderOptionsV2, decltype(&OrtApis::ReleaseCUDAProviderOptions)> rel_cuda_options(
cuda_options, &OrtApis::ReleaseCUDAProviderOptions);
std::vector<const char*> keys{"device_id", "use_tf32"};
std::vector<const char*> values;
std::string device_id = Env::Default().GetEnvironmentVar("ONNXRUNTIME_TEST_GPU_DEVICE_ID");
values.push_back(device_id.empty() ? "0" : device_id.c_str());
values.push_back("0");
ASSERT_ORT_STATUS_OK(OrtApis::UpdateCUDAProviderOptions(cuda_options, keys.data(), values.data(), 2));
ortso.AppendExecutionProvider_CUDA_V2(*cuda_options);
} else if (provider_name == "migraphx") {
OrtMIGraphXProviderOptions ep_options;

View file

@ -8,7 +8,7 @@
#ifdef USE_COREML
#include "core/providers/coreml/coreml_provider_factory.h"
#endif
#if defined(ENABLE_CUDA_NHWC_OPS)
#ifdef USE_CUDA
#include <core/providers/cuda/cuda_provider_options.h>
#endif
#include "core/session/onnxruntime_cxx_api.h"
@ -113,8 +113,9 @@ std::unique_ptr<IExecutionProvider> DefaultOpenVINOExecutionProvider() {
std::unique_ptr<IExecutionProvider> DefaultCudaExecutionProvider() {
#ifdef USE_CUDA
OrtCUDAProviderOptions provider_options{};
OrtCUDAProviderOptionsV2 provider_options{};
provider_options.do_copy_in_default_stream = true;
provider_options.use_tf32 = false;
if (auto factory = CudaProviderFactoryCreator::Create(&provider_options))
return factory->CreateProvider();
#endif
@ -126,6 +127,7 @@ std::unique_ptr<IExecutionProvider> DefaultCudaNHWCExecutionProvider() {
#if defined(USE_CUDA)
OrtCUDAProviderOptionsV2 provider_options{};
provider_options.do_copy_in_default_stream = true;
provider_options.use_tf32 = false;
provider_options.prefer_nhwc = true;
if (auto factory = CudaProviderFactoryCreator::Create(&provider_options))
return factory->CreateProvider();