Re-enable some of the recently disabled cuda tests (#7873)

This commit is contained in:
Ryan Hill 2021-06-03 14:28:30 -07:00 committed by GitHub
parent a118da160d
commit 8f8b9302a2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 66 additions and 97 deletions

View file

@ -1,13 +1,8 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#undef USE_CUDA // TODO: Cuda is a shared library, so can't call any Cuda provider methods directly from here
#include "test/framework/TestAllocatorManager.h"
#include "core/framework/allocatormgr.h"
#ifdef USE_CUDA
#include "core/providers/cuda/cuda_allocator.h"
#endif // USE_CUDA
namespace onnxruntime {
namespace test {
@ -99,14 +94,6 @@ AllocatorManager::AllocatorManager() {
Status AllocatorManager::InitializeAllocators() {
auto cpu_alocator = std::make_unique<CPUAllocator>();
ORT_RETURN_IF_ERROR(RegisterAllocator(map_, std::move(cpu_alocator), std::numeric_limits<size_t>::max(), true));
#ifdef USE_CUDA
auto cuda_alocator = std::make_unique<CUDAAllocator>(static_cast<OrtDevice::DeviceId>(0), CUDA);
ORT_RETURN_IF_ERROR(RegisterAllocator(map_, std::move(cuda_alocator), std::numeric_limits<size_t>::max(), true));
auto cuda_pinned_alocator = std::make_unique<CUDAPinnedAllocator>(static_cast<OrtDevice::DeviceId>(0), CUDA_PINNED);
ORT_RETURN_IF_ERROR(RegisterAllocator(map_, std::move(cuda_pinned_alocator), std::numeric_limits<size_t>::max(), true));
#endif // USE_CUDA
return Status::OK();
}

View file

@ -1,6 +1,5 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#if 0 // TODO: Can't call these directly from external code as Cuda is now a shared library
#include "core/graph/onnx_protobuf.h"
#include "core/session/inference_session.h"
@ -15,24 +14,25 @@
#include "core/framework/execution_provider.h"
#include "core/framework/op_kernel.h"
#include "core/framework/session_state.h"
#include "core/framework/tensorprotoutils.h"
#include "core/graph/graph_viewer.h"
#include "core/graph/model.h"
#include "core/graph/op.h"
#include "core/providers/cuda/cuda_execution_provider.h"
#include "core/providers/cpu/math/element_wise_ops.h"
#include "core/framework/tensorprotoutils.h"
#include "test/capturing_sink.h"
#include "test/test_environment.h"
#include "test/framework/test_utils.h"
#include "gtest/gtest.h"
#include "core/util/protobuf_parsing_utils.h"
#include "test/providers/provider_test_utils.h"
#include "default_providers.h"
#include "asserts.h"
using namespace std;
using namespace ONNX_NAMESPACE;
using namespace onnxruntime::logging;
namespace onnxruntime {
namespace test {
typedef std::vector<onnxruntime::NodeArg*> ArgMap;
@ -121,8 +121,7 @@ TEST(CUDAFenceTests, DISABLED_PartOnCPU) {
SessionOptions so;
FenceCudaTestInferenceSession session(so, GetEnvironment());
LoadInferenceSessionFromModel(session, *model);
CUDAExecutionProviderInfo xp_info;
ASSERT_STATUS_OK(session.RegisterExecutionProvider(std::make_unique<CUDAExecutionProvider>(xp_info)));
ASSERT_STATUS_OK(session.RegisterExecutionProvider(DefaultCudaExecutionProvider()));
ASSERT_TRUE(session.Initialize().IsOK());
ASSERT_TRUE(1 == CountCopyNodes(graph));
@ -176,8 +175,7 @@ TEST(CUDAFenceTests, TileWithInitializer) {
SessionOptions so;
FenceCudaTestInferenceSession session(so, GetEnvironment());
LoadInferenceSessionFromModel(session, *model);
CUDAExecutionProviderInfo xp_info;
ASSERT_STATUS_OK(session.RegisterExecutionProvider(std::make_unique<CUDAExecutionProvider>(xp_info)));
ASSERT_STATUS_OK(session.RegisterExecutionProvider(DefaultCudaExecutionProvider()));
ASSERT_STATUS_OK(session.Initialize());
vector<OrtValue> outputs;
@ -242,8 +240,7 @@ TEST(CUDAFenceTests, TileWithComputedInput) {
SessionOptions so;
FenceCudaTestInferenceSession session(so, GetEnvironment());
LoadInferenceSessionFromModel(session, *model);
CUDAExecutionProviderInfo xp_info;
ASSERT_STATUS_OK(session.RegisterExecutionProvider(std::make_unique<CUDAExecutionProvider>(xp_info)));
ASSERT_STATUS_OK(session.RegisterExecutionProvider(DefaultCudaExecutionProvider()));
ASSERT_TRUE(session.Initialize().IsOK());
vector<OrtValue> outputs;
@ -263,4 +260,3 @@ TEST(CUDAFenceTests, TileWithComputedInput) {
} // namespace test
} // namespace onnxruntime
#endif

View file

@ -1,6 +1,5 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#undef USE_CUDA // TODO: Cuda is a shared library, so can't call any Cuda provider methods directly from here
#include "core/graph/onnx_protobuf.h"
#include "core/session/inference_session.h"
@ -32,6 +31,7 @@
#include "core/platform/env.h"
#include "core/providers/cpu/cpu_execution_provider.h"
#include "core/providers/cpu/math/element_wise_ops.h"
#include "core/providers/cuda/cuda_provider_factory.h"
#ifdef USE_CUDA
#include "core/providers/cuda/gpu_data_transfer.h"
#elif USE_ROCM
@ -66,6 +66,11 @@ struct KernelRegistryAndStatus {
};
} // namespace
namespace onnxruntime {
#ifdef USE_CUDA
ProviderInfo_CUDA* GetProviderInfo_CUDA();
#endif
class FuseAdd : public OpKernel {
public:
explicit FuseAdd(const OpKernelInfo& info) : OpKernel(info) {
@ -260,6 +265,7 @@ void RunModelWithBindingMatMul(InferenceSession& session_object,
ProviderType bind_provider_type,
bool is_preallocate_output_vec,
ProviderType allocation_provider,
IExecutionProvider *gpu_provider,
OrtDevice* output_device) {
unique_ptr<IOBinding> io_binding;
Status st = session_object.NewIOBinding(&io_binding);
@ -307,16 +313,8 @@ void RunModelWithBindingMatMul(InferenceSession& session_object,
if (allocation_provider == kCpuExecutionProvider) {
AllocateMLValue<float>(TestCPUExecutionProvider()->GetAllocator(0, OrtMemTypeDefault), expected_output_dims,
&output_ml_value);
} else if (allocation_provider == kCudaExecutionProvider) {
#ifdef USE_CUDA
AllocateMLValue<float>(TestCudaExecutionProvider()->GetAllocator(0, OrtMemTypeDefault), expected_output_dims,
&output_ml_value);
#endif
} else if (allocation_provider == kRocmExecutionProvider) {
#ifdef USE_ROCM
AllocateMLValue<float>(TestRocmExecutionProvider()->GetAllocator(0, OrtMemTypeDefault), expected_output_dims,
&output_ml_value);
#endif
} else if (allocation_provider == kCudaExecutionProvider || allocation_provider == kRocmExecutionProvider) {
AllocateMLValue<float>(gpu_provider->GetAllocator(0, OrtMemTypeDefault), expected_output_dims, &output_ml_value);
} else {
ORT_THROW("Unsupported provider");
}
@ -354,11 +352,12 @@ void RunModelWithBindingMatMul(InferenceSession& session_object,
shape,
cpu_allocator);
#ifdef USE_CUDA
cudaStream_t stream = static_cast<cudaStream_t>(static_cast<const onnxruntime::CUDAExecutionProvider*>(TestCudaExecutionProvider())->GetComputeStream());
cudaStream_t stream = static_cast<cudaStream_t>(gpu_provider->GetComputeStream());
st = GetProviderInfo_CUDA()->CreateGPUDataTransfer(stream)->CopyTensor(rtensor, *cpu_tensor.get(), 0);
#elif USE_ROCM
hipStream_t stream = static_cast<hipStream_t>(static_cast<const onnxruntime::ROCMExecutionProvider*>(TestRocmExecutionProvider())->GetComputeStream());
#endif
hipStream_t stream = static_cast<hipStream_t>(gpu_provider->GetComputeStream());
st = GPUDataTransfer(stream).CopyTensor(rtensor, *cpu_tensor.get(), 0);
#endif
ASSERT_TRUE(st.IsOK());
OrtValue ml_value;
ml_value.Init(cpu_tensor.release(),
@ -367,14 +366,8 @@ void RunModelWithBindingMatMul(InferenceSession& session_object,
VerifyOutputs({ml_value}, expected_output_dims, expected_values_mul_y);
#endif
} else {
if (allocation_provider == kCudaExecutionProvider) {
#ifdef USE_CUDA
TestCudaExecutionProvider()->Sync();
#endif
} else if (allocation_provider == kRocmExecutionProvider) {
#ifdef USE_ROCM
TestRocmExecutionProvider()->Sync();
#endif
if (allocation_provider == kCudaExecutionProvider || allocation_provider == kRocmExecutionProvider) {
gpu_provider->Sync();
}
VerifyOutputs(io_binding->GetOutputs(), expected_output_dims, expected_values_mul_y);
}
@ -622,9 +615,7 @@ TEST(InferenceSessionTests, CheckRunProfilerWithSessionOptions) {
InferenceSession session_object(so, GetEnvironment());
#ifdef USE_CUDA
CUDAExecutionProviderInfo epi;
epi.device_id = 0;
EXPECT_TRUE(session_object.RegisterExecutionProvider(std::make_unique<CUDAExecutionProvider>(epi)).IsOK());
ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultCudaExecutionProvider()));
#endif
ASSERT_STATUS_OK(session_object.Load(MODEL_URI));
ASSERT_STATUS_OK(session_object.Initialize());
@ -858,16 +849,21 @@ static void TestBindHelper(const std::string& log_str,
so.session_log_verbosity_level = 1; // change to 1 for detailed logging
InferenceSession session_object{so, GetEnvironment()};
IExecutionProvider *gpu_provider{};
if (bind_provider_type == kCudaExecutionProvider || bind_provider_type == kRocmExecutionProvider) {
#ifdef USE_CUDA
CUDAExecutionProviderInfo epi;
epi.device_id = 0;
EXPECT_TRUE(session_object.RegisterExecutionProvider(std::make_unique<CUDAExecutionProvider>(epi)).IsOK());
auto provider = DefaultCudaExecutionProvider();
gpu_provider = provider.get();
ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(std::move(provider)));
#elif USE_ROCM
ROCMExecutionProviderInfo epi;
epi.device_id = 0;
EXPECT_TRUE(session_object.RegisterExecutionProvider(std::make_unique<ROCMExecutionProvider>(epi)).IsOK());
auto provider = std::make_unique<ROCMExecutionProvider>(epi);
gpu_provider = provider.get();
ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(std::move(provider)));
#endif
}
@ -889,6 +885,7 @@ static void TestBindHelper(const std::string& log_str,
bind_provider_type,
preallocate_output,
allocation_provider,
gpu_provider,
output_device);
}
@ -1481,13 +1478,11 @@ TEST(InferenceSessionTests, Test3LayerNestedSubgraph) {
InferenceSession session_object{so, GetEnvironment()};
#ifdef USE_CUDA
CUDAExecutionProviderInfo epi;
epi.device_id = 0;
EXPECT_TRUE(session_object.RegisterExecutionProvider(std::make_unique<CUDAExecutionProvider>(epi)).IsOK());
ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultCudaExecutionProvider()));
#elif USE_ROCM
ROCMExecutionProviderInfo epi;
epi.device_id = 0;
EXPECT_TRUE(session_object.RegisterExecutionProvider(std::make_unique<ROCMExecutionProvider>(epi)).IsOK());
ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(std::make_unique<ROCMExecutionProvider>(epi)));
#endif
status = session_object.Load(model_file_name);
@ -1621,13 +1616,11 @@ TEST(InferenceSessionTests, Test2LayerNestedSubgraph) {
InferenceSession session_object{so, GetEnvironment()};
#ifdef USE_CUDA
CUDAExecutionProviderInfo epi;
epi.device_id = 0;
EXPECT_TRUE(session_object.RegisterExecutionProvider(std::make_unique<CUDAExecutionProvider>(epi)).IsOK());
ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultCudaExecutionProvider()));
#elif USE_ROCM
ROCMExecutionProviderInfo epi;
epi.device_id = 0;
EXPECT_TRUE(session_object.RegisterExecutionProvider(std::make_unique<ROCMExecutionProvider>(epi)).IsOK());
ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(std::make_unique<ROCMExecutionProvider>(epi)));
#endif
status = session_object.Load(model_file_name);
@ -1989,9 +1982,7 @@ TEST(InferenceSessionTests, TestParallelExecutionWithCudaProvider) {
so.session_logid = "InferenceSessionTests.TestParallelExecutionWithCudaProvider";
InferenceSession session_object{so, GetEnvironment()};
CUDAExecutionProviderInfo epi;
epi.device_id = 0;
EXPECT_TRUE(session_object.RegisterExecutionProvider(std::make_unique<CUDAExecutionProvider>(epi)).IsOK());
ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultCudaExecutionProvider()));
ASSERT_STATUS_OK(session_object.Load(model_uri));
@ -2012,12 +2003,13 @@ TEST(InferenceSessionTests, TestArenaShrinkageAfterRun) {
SessionOptions so;
InferenceSession session_object{so, GetEnvironment()};
CUDAExecutionProviderInfo epi;
epi.default_memory_arena_cfg = &arena_cfg;
OrtCUDAProviderOptions provider_options{};
provider_options.default_memory_arena_cfg = &arena_cfg;
provider_options.device_id = 0;
auto factory = CreateExecutionProviderFactory_Cuda(&provider_options);
epi.device_id = 0;
ASSERT_STATUS_OK(session_object.Load(MODEL_URI));
EXPECT_TRUE(session_object.RegisterExecutionProvider(std::make_unique<CUDAExecutionProvider>(epi)).IsOK());
ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(factory->CreateProvider()));
ASSERT_STATUS_OK(session_object.Initialize());
// Fetch the CUDA allocator to analyze its stats

View file

@ -1,12 +1,12 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#undef USE_CUDA // TODO: Cuda is a shared library, so can't call any Cuda provider methods directly from here
#include <iterator>
#include "core/framework/execution_providers.h"
#include "core/optimizer/transformer_memcpy.h"
#include "core/graph/model.h"
#include "default_providers.h"
#include "gtest/gtest.h"
#include "test_utils.h"
#include "test/test_environment.h"
@ -106,8 +106,7 @@ TEST(TransformerTest, MemcpyTransformerTest) {
KernelRegistryManager kernel_registry_manager;
ExecutionProviders execution_providers;
execution_providers.Add(onnxruntime::kCudaExecutionProvider,
std::make_unique<CUDAExecutionProvider>(CUDAExecutionProviderInfo()));
execution_providers.Add(onnxruntime::kCudaExecutionProvider, DefaultCudaExecutionProvider());
execution_providers.Add(onnxruntime::kCpuExecutionProvider,
std::make_unique<CPUExecutionProvider>(CPUExecutionProviderInfo()));
KernelRegistryManager test_registry_manager;
@ -162,8 +161,7 @@ TEST(TransformerTest, MemcpyTransformerTestCudaFirst) {
KernelRegistryManager kernel_registry_manager;
ExecutionProviders execution_providers;
execution_providers.Add(onnxruntime::kCudaExecutionProvider,
std::make_unique<CUDAExecutionProvider>(CUDAExecutionProviderInfo()));
execution_providers.Add(onnxruntime::kCudaExecutionProvider, DefaultCudaExecutionProvider());
execution_providers.Add(onnxruntime::kCpuExecutionProvider,
std::make_unique<CPUExecutionProvider>(CPUExecutionProviderInfo()));
KernelRegistryManager test_registry_manager;
@ -277,8 +275,7 @@ TEST(TransformerTest, TestCopyNodeInsertionInitializerInSubgraph) {
KernelRegistryManager kernel_registry_manager;
ExecutionProviders execution_providers;
execution_providers.Add(onnxruntime::kCudaExecutionProvider,
std::make_unique<CUDAExecutionProvider>(CUDAExecutionProviderInfo()));
execution_providers.Add(onnxruntime::kCudaExecutionProvider, DefaultCudaExecutionProvider());
execution_providers.Add(onnxruntime::kCpuExecutionProvider,
std::make_unique<CPUExecutionProvider>(CPUExecutionProviderInfo()));
KernelRegistryManager test_registry_manager;

View file

@ -1,12 +1,10 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include <memory>
#include "default_providers.h"
#include "providers.h"
#include "core/providers/cpu/cpu_provider_factory_creator.h"
#ifdef USE_CUDA
#include "core/providers/cuda/cuda_provider_factory_creator.h"
#endif
#ifdef USE_ROCM
#include "core/providers/rocm/rocm_provider_factory_creator.h"
#endif
@ -16,26 +14,6 @@
#include "core/session/onnxruntime_cxx_api.h"
namespace onnxruntime {
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(
const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, bool use_compiled_network, const char* blob_dump_path);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Cuda(const OrtCUDAProviderOptions* provider_options);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(int use_arena);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* params);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Nuphar(bool, const char*);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Nnapi(uint32_t);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Rknpu();
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* params);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGraphX(int device_id);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_ACL(int use_arena);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_ArmNN(int use_arena);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_CoreML(uint32_t);
// EP for internal testing
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_InternalTesting(
const std::unordered_set<std::string>& supported_ops);
namespace test {
std::unique_ptr<IExecutionProvider> DefaultCpuExecutionProvider(bool enable_arena) {

View file

@ -1,9 +1,28 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include "core/providers/providers.h"
#include "core/framework/execution_provider.h"
namespace onnxruntime {
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_ACL(int use_arena);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_ArmNN(int use_arena);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_CoreML(uint32_t);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Cuda(const OrtCUDAProviderOptions* provider_options);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(int use_arena);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGraphX(int device_id);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Nnapi(uint32_t);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Nuphar(bool, const char*);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(
const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, bool use_compiled_network, const char* blob_dump_path);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* params);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Rknpu();
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* params);
// EP for internal testing
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_InternalTesting(const std::unordered_set<std::string>& supported_ops);
namespace test {
// unique_ptr providers with default values for session registration