enable more unit tests for ROCM EP (#7222)

This commit is contained in:
Weixing Zhang 2021-04-02 15:57:08 -07:00 committed by GitHub
parent afbbeaa30a
commit ef88dc912c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 39 additions and 20 deletions

View file

@ -636,7 +636,7 @@ if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS)
endif()
if (onnxruntime_USE_ROCM)
target_include_directories(onnxruntime_test_all PRIVATE ${onnxruntime_ROCM_HOME}/include/hiprand ${onnxruntime_ROCM_HOME}/include/rocrand)
target_include_directories(onnxruntime_test_all PRIVATE ${onnxruntime_ROCM_HOME}/hipfft/include ${onnxruntime_ROCM_HOME}/include ${onnxruntime_ROCM_HOME}/hiprand/include ${onnxruntime_ROCM_HOME}/rocrand/include ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining)
endif()
set(test_data_target onnxruntime_test_all)

View file

@ -25,8 +25,8 @@ using namespace onnxruntime::test;
enum TrainingMode { TrainingFalse, TrainingTrue, NoTraining };
// BiasDropout kernel is only implemented for CUDA
#ifdef USE_CUDA
// BiasDropout kernel is only implemented for CUDA/ROCM
#if defined(USE_CUDA) || defined(USE_ROCM)
namespace {
void RunBiasDropoutTest(const bool use_mask, const std::vector<int64_t>& input_shape, float ratio = -1.0f,
TrainingMode training_mode = TrainingTrue, bool use_float16_ratio = false, bool has_residual = true) {

View file

@ -777,7 +777,7 @@ TEST(MathOpTest, Pow_double_int64) {
test.Run();
}
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
TEST(MathOpTest, Pow_float16_float16) {
OpTester test("Pow", 12);
std::vector<int64_t> dims{4};
@ -787,7 +787,11 @@ TEST(MathOpTest, Pow_float16_float16) {
test.AddOutput<MLFloat16>("Z", dims, MakeMLFloat16({1.0f, 256.0f, 2.0f, 1.0f}));
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
#ifdef USE_CUDA
execution_providers.push_back(DefaultCudaExecutionProvider());
#elif USE_ROCM
execution_providers.push_back(DefaultRocmExecutionProvider());
#endif
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
}
@ -800,7 +804,11 @@ TEST(MathOpTest, Pow_float_float16) {
test.AddOutput<MLFloat16>("Z", dims, MakeMLFloat16({1.0f, 256.0f, 2.0f, 1.0f}));
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
#ifdef USE_CUDA
execution_providers.push_back(DefaultCudaExecutionProvider());
#elif USE_ROCM
execution_providers.push_back(DefaultRocmExecutionProvider());
#endif
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
}
#endif

View file

@ -43,13 +43,15 @@ TEST(GemmOpTest, GemmNoTransBIsInitializer) {
}
// Only CUDA kernel has float 16 support
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
TEST(GemmOpTest, GemmNoTrans_f16) {
#ifdef USE_CUDA
int min_cuda_architecture = 530;
if (!HasCudaEnvironment(min_cuda_architecture)) {
LOGS_DEFAULT(WARNING) << "Hardware NOT support FP16";
return;
}
#endif
OpTester test("Gemm");
test.AddAttribute("transA", (int64_t)0);

View file

@ -1311,7 +1311,7 @@ TEST(ReductionOpTest, ReduceSum_int32) {
test.Run();
}
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
TEST(ReductionOpTest, ReduceSumHalfHalf) {
OpTester test("ReduceSum");
test.AddAttribute("keepdims", (int64_t)0);
@ -1465,7 +1465,7 @@ TEST(ReductionOpTest, ReduceSum_batch_by_seq_by_128) {
}
}
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
TEST(ReductionOpTest, ReduceSum_batch_by_seq_by_30528) {
test_apex_reduce_sum(4 * 128, 30528);
test_apex_reduce_sum(4 * 512, 30528);

View file

@ -82,7 +82,7 @@ TEST(GatherOpTest, Gather_invalid_index_cpu) {
{kCudaExecutionProvider, kOpenVINOExecutionProvider, kDnnlExecutionProvider, kNupharExecutionProvider, kTensorrtExecutionProvider});
}
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
TEST(GatherOpTest, Gather_invalid_index_gpu) {
OpTester test("Gather");
// Invalid index 3. data[3] does not exist.

View file

@ -2082,7 +2082,7 @@ TEST(GradientUtilsTest, InPlaceAccumulatorFloat32) {
test.Run();
}
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
TEST(GradientUtilsTest, InPlaceAccumulatorFloat16) {
OpTester test("InPlaceAccumulator", 1, onnxruntime::kMSDomain);
@ -2113,7 +2113,7 @@ TEST(GradientUtilsTest, ZeroGradientFloat32) {
test.Run();
}
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
TEST(GradientUtilsTest, ZeroGradientFloat16) {
OpTester test("ZeroGradient", 1, onnxruntime::kMSDomain);

View file

@ -15,9 +15,13 @@
#include "orttraining/training_ops/cpu/controlflow/event_pool.h" // TODO: move with PipelineBatchPlanner
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
#include "bert_toy_fetches.h"
#ifdef USE_CUDA
#include "core/providers/cuda/cuda_execution_provider.h"
#elif USE_ROCM
#include "core/providers/rocm/rocm_execution_provider.h"
#endif
#endif
using namespace onnxruntime::logging;
@ -299,7 +303,7 @@ TEST(GradientGraphBuilderTest, TrainingSession_WithProfiler) {
ASSERT_TRUE(count > 1);
}
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
static void RunBertTrainingWithChecks(
const SessionOptions& so,
const PathString& backprop_model_file) {
@ -316,9 +320,13 @@ static void RunBertTrainingWithChecks(
auto model_metadata = res.second;
std::cout << "Loaded " << model_metadata->graph_name << '\n';
#ifdef USE_CUDA
CUDAExecutionProviderInfo xp_info;
ASSERT_STATUS_OK(training_session->RegisterExecutionProvider(onnxruntime::make_unique<CUDAExecutionProvider>(xp_info)));
#elif USE_ROCM
ROCMExecutionProviderInfo xp_info;
ASSERT_STATUS_OK(training_session->RegisterExecutionProvider(onnxruntime::make_unique<ROCMExecutionProvider>(xp_info)));
#endif
ASSERT_STATUS_OK(training_session->Initialize());
RunOptions run_options;
@ -494,7 +502,7 @@ TEST(GradientGraphBuilderTest, TrainingSession_BertToy) {
PathString backprop_model_file;
ASSERT_STATUS_OK(BuildBackPropGraph(model_path, config, backprop_model_file));
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
SessionOptions so;
RunBertTrainingWithChecks(so, backprop_model_file);
#endif

View file

@ -10,7 +10,7 @@
namespace onnxruntime {
namespace test {
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
TEST(IsFiniteTest, Float) {
OpTester test("IsFinite", 1, kMSDomain);

View file

@ -14,7 +14,7 @@
namespace onnxruntime {
namespace test {
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
void test_all_1d_true(size_t size) {
std::unique_ptr<bool[]> p_data(new bool[size]);
@ -103,7 +103,7 @@ TEST_P(ReductionOpTest, ReduceAllL2) {
test.Run();
}
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
TEST_P(ReductionOpTest, ReduceAllL2HalfHalf) {
OpTester test("ReduceAllL2", 1, onnxruntime::kMSDomain, true);
test.SetDeterminism(GetParam());
@ -345,7 +345,7 @@ TEST(ReductionOpTest, ReduceSumTraining_neg_axis) {
test.Run();
}
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
TEST(ReductionOpTest, ReduceSumTrainingHalfHalf) {
OpTester test("ReduceSumTraining", 1, onnxruntime::kMSDomain);
test.AddAttribute("keepdims", (int64_t)0);

View file

@ -97,13 +97,14 @@ void RunGatherGradTestWithRandomData(
}
} // namespace
#ifdef USE_CUDA
#if defined(USE_CUDA) || defined(USE_ROCM)
//TODO: Currently this cannot pass CI, due to GPU architecture problem
TEST(GatherOpTest, Gather_axis0_indices2d_half) {
#ifdef USE_CUDA
if (NeedSkipIfCudaArchLowerThan(700)) {
return;
}
#endif
OpTester test("Gather");
test.AddAttribute<int64_t>("axis", 0LL);
test.AddInput<MLFloat16>("data", {3, 3},