mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-05 04:17:53 +00:00
enable more unit tests for ROCM EP (#7222)
This commit is contained in:
parent
afbbeaa30a
commit
ef88dc912c
11 changed files with 39 additions and 20 deletions
|
|
@ -636,7 +636,7 @@ if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS)
|
|||
endif()
|
||||
|
||||
if (onnxruntime_USE_ROCM)
|
||||
target_include_directories(onnxruntime_test_all PRIVATE ${onnxruntime_ROCM_HOME}/include/hiprand ${onnxruntime_ROCM_HOME}/include/rocrand)
|
||||
target_include_directories(onnxruntime_test_all PRIVATE ${onnxruntime_ROCM_HOME}/hipfft/include ${onnxruntime_ROCM_HOME}/include ${onnxruntime_ROCM_HOME}/hiprand/include ${onnxruntime_ROCM_HOME}/rocrand/include ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining)
|
||||
endif()
|
||||
|
||||
set(test_data_target onnxruntime_test_all)
|
||||
|
|
|
|||
|
|
@ -25,8 +25,8 @@ using namespace onnxruntime::test;
|
|||
|
||||
enum TrainingMode { TrainingFalse, TrainingTrue, NoTraining };
|
||||
|
||||
// BiasDropout kernel is only implemented for CUDA
|
||||
#ifdef USE_CUDA
|
||||
// BiasDropout kernel is only implemented for CUDA/ROCM
|
||||
#if defined(USE_CUDA) || defined(USE_ROCM)
|
||||
namespace {
|
||||
void RunBiasDropoutTest(const bool use_mask, const std::vector<int64_t>& input_shape, float ratio = -1.0f,
|
||||
TrainingMode training_mode = TrainingTrue, bool use_float16_ratio = false, bool has_residual = true) {
|
||||
|
|
|
|||
|
|
@ -777,7 +777,7 @@ TEST(MathOpTest, Pow_double_int64) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#if defined(USE_CUDA) || defined(USE_ROCM)
|
||||
TEST(MathOpTest, Pow_float16_float16) {
|
||||
OpTester test("Pow", 12);
|
||||
std::vector<int64_t> dims{4};
|
||||
|
|
@ -787,7 +787,11 @@ TEST(MathOpTest, Pow_float16_float16) {
|
|||
test.AddOutput<MLFloat16>("Z", dims, MakeMLFloat16({1.0f, 256.0f, 2.0f, 1.0f}));
|
||||
|
||||
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
|
||||
#ifdef USE_CUDA
|
||||
execution_providers.push_back(DefaultCudaExecutionProvider());
|
||||
#elif USE_ROCM
|
||||
execution_providers.push_back(DefaultRocmExecutionProvider());
|
||||
#endif
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
|
||||
}
|
||||
|
||||
|
|
@ -800,7 +804,11 @@ TEST(MathOpTest, Pow_float_float16) {
|
|||
test.AddOutput<MLFloat16>("Z", dims, MakeMLFloat16({1.0f, 256.0f, 2.0f, 1.0f}));
|
||||
|
||||
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
|
||||
#ifdef USE_CUDA
|
||||
execution_providers.push_back(DefaultCudaExecutionProvider());
|
||||
#elif USE_ROCM
|
||||
execution_providers.push_back(DefaultRocmExecutionProvider());
|
||||
#endif
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -43,13 +43,15 @@ TEST(GemmOpTest, GemmNoTransBIsInitializer) {
|
|||
}
|
||||
|
||||
// Only CUDA kernel has float 16 support
|
||||
#ifdef USE_CUDA
|
||||
#if defined(USE_CUDA) || defined(USE_ROCM)
|
||||
TEST(GemmOpTest, GemmNoTrans_f16) {
|
||||
#ifdef USE_CUDA
|
||||
int min_cuda_architecture = 530;
|
||||
if (!HasCudaEnvironment(min_cuda_architecture)) {
|
||||
LOGS_DEFAULT(WARNING) << "Hardware NOT support FP16";
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
OpTester test("Gemm");
|
||||
|
||||
test.AddAttribute("transA", (int64_t)0);
|
||||
|
|
|
|||
|
|
@ -1311,7 +1311,7 @@ TEST(ReductionOpTest, ReduceSum_int32) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#if defined(USE_CUDA) || defined(USE_ROCM)
|
||||
TEST(ReductionOpTest, ReduceSumHalfHalf) {
|
||||
OpTester test("ReduceSum");
|
||||
test.AddAttribute("keepdims", (int64_t)0);
|
||||
|
|
@ -1465,7 +1465,7 @@ TEST(ReductionOpTest, ReduceSum_batch_by_seq_by_128) {
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#if defined(USE_CUDA) || defined(USE_ROCM)
|
||||
TEST(ReductionOpTest, ReduceSum_batch_by_seq_by_30528) {
|
||||
test_apex_reduce_sum(4 * 128, 30528);
|
||||
test_apex_reduce_sum(4 * 512, 30528);
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ TEST(GatherOpTest, Gather_invalid_index_cpu) {
|
|||
{kCudaExecutionProvider, kOpenVINOExecutionProvider, kDnnlExecutionProvider, kNupharExecutionProvider, kTensorrtExecutionProvider});
|
||||
}
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#if defined(USE_CUDA) || defined(USE_ROCM)
|
||||
TEST(GatherOpTest, Gather_invalid_index_gpu) {
|
||||
OpTester test("Gather");
|
||||
// Invalid index 3. data[3] does not exist.
|
||||
|
|
|
|||
|
|
@ -2082,7 +2082,7 @@ TEST(GradientUtilsTest, InPlaceAccumulatorFloat32) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#if defined(USE_CUDA) || defined(USE_ROCM)
|
||||
TEST(GradientUtilsTest, InPlaceAccumulatorFloat16) {
|
||||
OpTester test("InPlaceAccumulator", 1, onnxruntime::kMSDomain);
|
||||
|
||||
|
|
@ -2113,7 +2113,7 @@ TEST(GradientUtilsTest, ZeroGradientFloat32) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#if defined(USE_CUDA) || defined(USE_ROCM)
|
||||
TEST(GradientUtilsTest, ZeroGradientFloat16) {
|
||||
OpTester test("ZeroGradient", 1, onnxruntime::kMSDomain);
|
||||
|
||||
|
|
|
|||
|
|
@ -15,9 +15,13 @@
|
|||
|
||||
#include "orttraining/training_ops/cpu/controlflow/event_pool.h" // TODO: move with PipelineBatchPlanner
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#if defined(USE_CUDA) || defined(USE_ROCM)
|
||||
#include "bert_toy_fetches.h"
|
||||
#ifdef USE_CUDA
|
||||
#include "core/providers/cuda/cuda_execution_provider.h"
|
||||
#elif USE_ROCM
|
||||
#include "core/providers/rocm/rocm_execution_provider.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
using namespace onnxruntime::logging;
|
||||
|
|
@ -299,7 +303,7 @@ TEST(GradientGraphBuilderTest, TrainingSession_WithProfiler) {
|
|||
ASSERT_TRUE(count > 1);
|
||||
}
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#if defined(USE_CUDA) || defined(USE_ROCM)
|
||||
static void RunBertTrainingWithChecks(
|
||||
const SessionOptions& so,
|
||||
const PathString& backprop_model_file) {
|
||||
|
|
@ -316,9 +320,13 @@ static void RunBertTrainingWithChecks(
|
|||
auto model_metadata = res.second;
|
||||
std::cout << "Loaded " << model_metadata->graph_name << '\n';
|
||||
|
||||
#ifdef USE_CUDA
|
||||
CUDAExecutionProviderInfo xp_info;
|
||||
ASSERT_STATUS_OK(training_session->RegisterExecutionProvider(onnxruntime::make_unique<CUDAExecutionProvider>(xp_info)));
|
||||
|
||||
#elif USE_ROCM
|
||||
ROCMExecutionProviderInfo xp_info;
|
||||
ASSERT_STATUS_OK(training_session->RegisterExecutionProvider(onnxruntime::make_unique<ROCMExecutionProvider>(xp_info)));
|
||||
#endif
|
||||
ASSERT_STATUS_OK(training_session->Initialize());
|
||||
|
||||
RunOptions run_options;
|
||||
|
|
@ -494,7 +502,7 @@ TEST(GradientGraphBuilderTest, TrainingSession_BertToy) {
|
|||
PathString backprop_model_file;
|
||||
ASSERT_STATUS_OK(BuildBackPropGraph(model_path, config, backprop_model_file));
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#if defined(USE_CUDA) || defined(USE_ROCM)
|
||||
SessionOptions so;
|
||||
RunBertTrainingWithChecks(so, backprop_model_file);
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@
|
|||
namespace onnxruntime {
|
||||
namespace test {
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#if defined(USE_CUDA) || defined(USE_ROCM)
|
||||
TEST(IsFiniteTest, Float) {
|
||||
OpTester test("IsFinite", 1, kMSDomain);
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
namespace onnxruntime {
|
||||
namespace test {
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#if defined(USE_CUDA) || defined(USE_ROCM)
|
||||
|
||||
void test_all_1d_true(size_t size) {
|
||||
std::unique_ptr<bool[]> p_data(new bool[size]);
|
||||
|
|
@ -103,7 +103,7 @@ TEST_P(ReductionOpTest, ReduceAllL2) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#if defined(USE_CUDA) || defined(USE_ROCM)
|
||||
TEST_P(ReductionOpTest, ReduceAllL2HalfHalf) {
|
||||
OpTester test("ReduceAllL2", 1, onnxruntime::kMSDomain, true);
|
||||
test.SetDeterminism(GetParam());
|
||||
|
|
@ -345,7 +345,7 @@ TEST(ReductionOpTest, ReduceSumTraining_neg_axis) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#if defined(USE_CUDA) || defined(USE_ROCM)
|
||||
TEST(ReductionOpTest, ReduceSumTrainingHalfHalf) {
|
||||
OpTester test("ReduceSumTraining", 1, onnxruntime::kMSDomain);
|
||||
test.AddAttribute("keepdims", (int64_t)0);
|
||||
|
|
|
|||
|
|
@ -97,13 +97,14 @@ void RunGatherGradTestWithRandomData(
|
|||
}
|
||||
} // namespace
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#if defined(USE_CUDA) || defined(USE_ROCM)
|
||||
//TODO: Currently this cannot pass CI, due to GPU architecture problem
|
||||
TEST(GatherOpTest, Gather_axis0_indices2d_half) {
|
||||
#ifdef USE_CUDA
|
||||
if (NeedSkipIfCudaArchLowerThan(700)) {
|
||||
return;
|
||||
}
|
||||
|
||||
#endif
|
||||
OpTester test("Gather");
|
||||
test.AddAttribute<int64_t>("axis", 0LL);
|
||||
test.AddInput<MLFloat16>("data", {3, 3},
|
||||
|
|
|
|||
Loading…
Reference in a new issue