diff --git a/onnxruntime/test/framework/TestAllocatorManager.cc b/onnxruntime/test/framework/TestAllocatorManager.cc index a4b9058938..be7e842037 100644 --- a/onnxruntime/test/framework/TestAllocatorManager.cc +++ b/onnxruntime/test/framework/TestAllocatorManager.cc @@ -1,13 +1,8 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#undef USE_CUDA // TODO: Cuda is a shared library, so can't call any Cuda provider methods directly from here - #include "test/framework/TestAllocatorManager.h" #include "core/framework/allocatormgr.h" -#ifdef USE_CUDA -#include "core/providers/cuda/cuda_allocator.h" -#endif // USE_CUDA namespace onnxruntime { namespace test { @@ -99,14 +94,6 @@ AllocatorManager::AllocatorManager() { Status AllocatorManager::InitializeAllocators() { auto cpu_alocator = std::make_unique(); ORT_RETURN_IF_ERROR(RegisterAllocator(map_, std::move(cpu_alocator), std::numeric_limits::max(), true)); -#ifdef USE_CUDA - auto cuda_alocator = std::make_unique(static_cast(0), CUDA); - ORT_RETURN_IF_ERROR(RegisterAllocator(map_, std::move(cuda_alocator), std::numeric_limits::max(), true)); - - auto cuda_pinned_alocator = std::make_unique(static_cast(0), CUDA_PINNED); - ORT_RETURN_IF_ERROR(RegisterAllocator(map_, std::move(cuda_pinned_alocator), std::numeric_limits::max(), true)); -#endif // USE_CUDA - return Status::OK(); } diff --git a/onnxruntime/test/framework/cuda/fence_cuda_test.cc b/onnxruntime/test/framework/cuda/fence_cuda_test.cc index e04b101c65..dac4c77b41 100644 --- a/onnxruntime/test/framework/cuda/fence_cuda_test.cc +++ b/onnxruntime/test/framework/cuda/fence_cuda_test.cc @@ -1,6 +1,5 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#if 0 // TODO: Can't call these directly from external code as Cuda is now a shared library #include "core/graph/onnx_protobuf.h" #include "core/session/inference_session.h" @@ -15,24 +14,25 @@ #include "core/framework/execution_provider.h" #include "core/framework/op_kernel.h" #include "core/framework/session_state.h" +#include "core/framework/tensorprotoutils.h" #include "core/graph/graph_viewer.h" #include "core/graph/model.h" #include "core/graph/op.h" -#include "core/providers/cuda/cuda_execution_provider.h" #include "core/providers/cpu/math/element_wise_ops.h" -#include "core/framework/tensorprotoutils.h" #include "test/capturing_sink.h" #include "test/test_environment.h" #include "test/framework/test_utils.h" #include "gtest/gtest.h" #include "core/util/protobuf_parsing_utils.h" #include "test/providers/provider_test_utils.h" +#include "default_providers.h" #include "asserts.h" using namespace std; using namespace ONNX_NAMESPACE; using namespace onnxruntime::logging; +namespace onnxruntime { namespace test { typedef std::vector ArgMap; @@ -121,8 +121,7 @@ TEST(CUDAFenceTests, DISABLED_PartOnCPU) { SessionOptions so; FenceCudaTestInferenceSession session(so, GetEnvironment()); LoadInferenceSessionFromModel(session, *model); - CUDAExecutionProviderInfo xp_info; - ASSERT_STATUS_OK(session.RegisterExecutionProvider(std::make_unique(xp_info))); + ASSERT_STATUS_OK(session.RegisterExecutionProvider(DefaultCudaExecutionProvider())); ASSERT_TRUE(session.Initialize().IsOK()); ASSERT_TRUE(1 == CountCopyNodes(graph)); @@ -176,8 +175,7 @@ TEST(CUDAFenceTests, TileWithInitializer) { SessionOptions so; FenceCudaTestInferenceSession session(so, GetEnvironment()); LoadInferenceSessionFromModel(session, *model); - CUDAExecutionProviderInfo xp_info; - ASSERT_STATUS_OK(session.RegisterExecutionProvider(std::make_unique(xp_info))); + ASSERT_STATUS_OK(session.RegisterExecutionProvider(DefaultCudaExecutionProvider())); ASSERT_STATUS_OK(session.Initialize()); vector outputs; @@ -242,8 +240,7 @@ TEST(CUDAFenceTests, TileWithComputedInput) { SessionOptions so; FenceCudaTestInferenceSession session(so, GetEnvironment()); LoadInferenceSessionFromModel(session, *model); - CUDAExecutionProviderInfo xp_info; - ASSERT_STATUS_OK(session.RegisterExecutionProvider(std::make_unique(xp_info))); + ASSERT_STATUS_OK(session.RegisterExecutionProvider(DefaultCudaExecutionProvider())); ASSERT_TRUE(session.Initialize().IsOK()); vector outputs; @@ -263,4 +260,3 @@ TEST(CUDAFenceTests, TileWithComputedInput) { } // namespace test } // namespace onnxruntime -#endif diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc index 25a0e0a272..755f25c2cb 100644 --- a/onnxruntime/test/framework/inference_session_test.cc +++ b/onnxruntime/test/framework/inference_session_test.cc @@ -1,6 +1,5 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#undef USE_CUDA // TODO: Cuda is a shared library, so can't call any Cuda provider methods directly from here #include "core/graph/onnx_protobuf.h" #include "core/session/inference_session.h" @@ -32,6 +31,7 @@ #include "core/platform/env.h" #include "core/providers/cpu/cpu_execution_provider.h" #include "core/providers/cpu/math/element_wise_ops.h" +#include "core/providers/cuda/cuda_provider_factory.h" #ifdef USE_CUDA #include "core/providers/cuda/gpu_data_transfer.h" #elif USE_ROCM @@ -66,6 +66,11 @@ struct KernelRegistryAndStatus { }; } // namespace namespace onnxruntime { + +#ifdef USE_CUDA +ProviderInfo_CUDA* GetProviderInfo_CUDA(); +#endif + class FuseAdd : public OpKernel { public: explicit FuseAdd(const OpKernelInfo& info) : OpKernel(info) { @@ -260,6 +265,7 @@ void RunModelWithBindingMatMul(InferenceSession& session_object, ProviderType bind_provider_type, bool is_preallocate_output_vec, ProviderType allocation_provider, + IExecutionProvider *gpu_provider, OrtDevice* output_device) { unique_ptr io_binding; Status st = session_object.NewIOBinding(&io_binding); @@ -307,16 +313,8 @@ void RunModelWithBindingMatMul(InferenceSession& session_object, if (allocation_provider == kCpuExecutionProvider) { AllocateMLValue(TestCPUExecutionProvider()->GetAllocator(0, OrtMemTypeDefault), expected_output_dims, &output_ml_value); - } else if (allocation_provider == kCudaExecutionProvider) { -#ifdef USE_CUDA - AllocateMLValue(TestCudaExecutionProvider()->GetAllocator(0, OrtMemTypeDefault), expected_output_dims, - &output_ml_value); -#endif - } else if (allocation_provider == kRocmExecutionProvider) { -#ifdef USE_ROCM - AllocateMLValue(TestRocmExecutionProvider()->GetAllocator(0, OrtMemTypeDefault), expected_output_dims, - &output_ml_value); -#endif + } else if (allocation_provider == kCudaExecutionProvider || allocation_provider == kRocmExecutionProvider) { + AllocateMLValue(gpu_provider->GetAllocator(0, OrtMemTypeDefault), expected_output_dims, &output_ml_value); } else { ORT_THROW("Unsupported provider"); } @@ -354,11 +352,12 @@ void RunModelWithBindingMatMul(InferenceSession& session_object, shape, cpu_allocator); #ifdef USE_CUDA - cudaStream_t stream = static_cast(static_cast(TestCudaExecutionProvider())->GetComputeStream()); + cudaStream_t stream = static_cast(gpu_provider->GetComputeStream()); + st = GetProviderInfo_CUDA()->CreateGPUDataTransfer(stream)->CopyTensor(rtensor, *cpu_tensor.get(), 0); #elif USE_ROCM - hipStream_t stream = static_cast(static_cast(TestRocmExecutionProvider())->GetComputeStream()); -#endif + hipStream_t stream = static_cast(gpu_provider->GetComputeStream()); st = GPUDataTransfer(stream).CopyTensor(rtensor, *cpu_tensor.get(), 0); +#endif ASSERT_TRUE(st.IsOK()); OrtValue ml_value; ml_value.Init(cpu_tensor.release(), @@ -367,14 +366,8 @@ void RunModelWithBindingMatMul(InferenceSession& session_object, VerifyOutputs({ml_value}, expected_output_dims, expected_values_mul_y); #endif } else { - if (allocation_provider == kCudaExecutionProvider) { -#ifdef USE_CUDA - TestCudaExecutionProvider()->Sync(); -#endif - } else if (allocation_provider == kRocmExecutionProvider) { -#ifdef USE_ROCM - TestRocmExecutionProvider()->Sync(); -#endif + if (allocation_provider == kCudaExecutionProvider || allocation_provider == kRocmExecutionProvider) { + gpu_provider->Sync(); } VerifyOutputs(io_binding->GetOutputs(), expected_output_dims, expected_values_mul_y); } @@ -622,9 +615,7 @@ TEST(InferenceSessionTests, CheckRunProfilerWithSessionOptions) { InferenceSession session_object(so, GetEnvironment()); #ifdef USE_CUDA - CUDAExecutionProviderInfo epi; - epi.device_id = 0; - EXPECT_TRUE(session_object.RegisterExecutionProvider(std::make_unique(epi)).IsOK()); + ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultCudaExecutionProvider())); #endif ASSERT_STATUS_OK(session_object.Load(MODEL_URI)); ASSERT_STATUS_OK(session_object.Initialize()); @@ -858,16 +849,21 @@ static void TestBindHelper(const std::string& log_str, so.session_log_verbosity_level = 1; // change to 1 for detailed logging InferenceSession session_object{so, GetEnvironment()}; + IExecutionProvider *gpu_provider{}; if (bind_provider_type == kCudaExecutionProvider || bind_provider_type == kRocmExecutionProvider) { #ifdef USE_CUDA - CUDAExecutionProviderInfo epi; - epi.device_id = 0; - EXPECT_TRUE(session_object.RegisterExecutionProvider(std::make_unique(epi)).IsOK()); + auto provider = DefaultCudaExecutionProvider(); + gpu_provider = provider.get(); + ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(std::move(provider))); #elif USE_ROCM ROCMExecutionProviderInfo epi; epi.device_id = 0; - EXPECT_TRUE(session_object.RegisterExecutionProvider(std::make_unique(epi)).IsOK()); + + auto provider = std::make_unique(epi); + gpu_provider = provider.get(); + + ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(std::move(provider))); #endif } @@ -889,6 +885,7 @@ static void TestBindHelper(const std::string& log_str, bind_provider_type, preallocate_output, allocation_provider, + gpu_provider, output_device); } @@ -1481,13 +1478,11 @@ TEST(InferenceSessionTests, Test3LayerNestedSubgraph) { InferenceSession session_object{so, GetEnvironment()}; #ifdef USE_CUDA - CUDAExecutionProviderInfo epi; - epi.device_id = 0; - EXPECT_TRUE(session_object.RegisterExecutionProvider(std::make_unique(epi)).IsOK()); + ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultCudaExecutionProvider())); #elif USE_ROCM ROCMExecutionProviderInfo epi; epi.device_id = 0; - EXPECT_TRUE(session_object.RegisterExecutionProvider(std::make_unique(epi)).IsOK()); + ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(std::make_unique(epi))); #endif status = session_object.Load(model_file_name); @@ -1621,13 +1616,11 @@ TEST(InferenceSessionTests, Test2LayerNestedSubgraph) { InferenceSession session_object{so, GetEnvironment()}; #ifdef USE_CUDA - CUDAExecutionProviderInfo epi; - epi.device_id = 0; - EXPECT_TRUE(session_object.RegisterExecutionProvider(std::make_unique(epi)).IsOK()); + ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultCudaExecutionProvider())); #elif USE_ROCM ROCMExecutionProviderInfo epi; epi.device_id = 0; - EXPECT_TRUE(session_object.RegisterExecutionProvider(std::make_unique(epi)).IsOK()); + ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(std::make_unique(epi))); #endif status = session_object.Load(model_file_name); @@ -1989,9 +1982,7 @@ TEST(InferenceSessionTests, TestParallelExecutionWithCudaProvider) { so.session_logid = "InferenceSessionTests.TestParallelExecutionWithCudaProvider"; InferenceSession session_object{so, GetEnvironment()}; - CUDAExecutionProviderInfo epi; - epi.device_id = 0; - EXPECT_TRUE(session_object.RegisterExecutionProvider(std::make_unique(epi)).IsOK()); + ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultCudaExecutionProvider())); ASSERT_STATUS_OK(session_object.Load(model_uri)); @@ -2012,12 +2003,13 @@ TEST(InferenceSessionTests, TestArenaShrinkageAfterRun) { SessionOptions so; InferenceSession session_object{so, GetEnvironment()}; - CUDAExecutionProviderInfo epi; - epi.default_memory_arena_cfg = &arena_cfg; + OrtCUDAProviderOptions provider_options{}; + provider_options.default_memory_arena_cfg = &arena_cfg; + provider_options.device_id = 0; + auto factory = CreateExecutionProviderFactory_Cuda(&provider_options); - epi.device_id = 0; ASSERT_STATUS_OK(session_object.Load(MODEL_URI)); - EXPECT_TRUE(session_object.RegisterExecutionProvider(std::make_unique(epi)).IsOK()); + ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(factory->CreateProvider())); ASSERT_STATUS_OK(session_object.Initialize()); // Fetch the CUDA allocator to analyze its stats diff --git a/onnxruntime/test/framework/memcpy_transformer_test.cc b/onnxruntime/test/framework/memcpy_transformer_test.cc index 7b08f9df2c..5b8a786363 100644 --- a/onnxruntime/test/framework/memcpy_transformer_test.cc +++ b/onnxruntime/test/framework/memcpy_transformer_test.cc @@ -1,12 +1,12 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#undef USE_CUDA // TODO: Cuda is a shared library, so can't call any Cuda provider methods directly from here #include #include "core/framework/execution_providers.h" #include "core/optimizer/transformer_memcpy.h" #include "core/graph/model.h" +#include "default_providers.h" #include "gtest/gtest.h" #include "test_utils.h" #include "test/test_environment.h" @@ -106,8 +106,7 @@ TEST(TransformerTest, MemcpyTransformerTest) { KernelRegistryManager kernel_registry_manager; ExecutionProviders execution_providers; - execution_providers.Add(onnxruntime::kCudaExecutionProvider, - std::make_unique(CUDAExecutionProviderInfo())); + execution_providers.Add(onnxruntime::kCudaExecutionProvider, DefaultCudaExecutionProvider()); execution_providers.Add(onnxruntime::kCpuExecutionProvider, std::make_unique(CPUExecutionProviderInfo())); KernelRegistryManager test_registry_manager; @@ -162,8 +161,7 @@ TEST(TransformerTest, MemcpyTransformerTestCudaFirst) { KernelRegistryManager kernel_registry_manager; ExecutionProviders execution_providers; - execution_providers.Add(onnxruntime::kCudaExecutionProvider, - std::make_unique(CUDAExecutionProviderInfo())); + execution_providers.Add(onnxruntime::kCudaExecutionProvider, DefaultCudaExecutionProvider()); execution_providers.Add(onnxruntime::kCpuExecutionProvider, std::make_unique(CPUExecutionProviderInfo())); KernelRegistryManager test_registry_manager; @@ -277,8 +275,7 @@ TEST(TransformerTest, TestCopyNodeInsertionInitializerInSubgraph) { KernelRegistryManager kernel_registry_manager; ExecutionProviders execution_providers; - execution_providers.Add(onnxruntime::kCudaExecutionProvider, - std::make_unique(CUDAExecutionProviderInfo())); + execution_providers.Add(onnxruntime::kCudaExecutionProvider, DefaultCudaExecutionProvider()); execution_providers.Add(onnxruntime::kCpuExecutionProvider, std::make_unique(CPUExecutionProviderInfo())); KernelRegistryManager test_registry_manager; diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc index 9418bda827..9d726d7a5c 100644 --- a/onnxruntime/test/util/default_providers.cc +++ b/onnxruntime/test/util/default_providers.cc @@ -1,12 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include #include "default_providers.h" #include "providers.h" #include "core/providers/cpu/cpu_provider_factory_creator.h" -#ifdef USE_CUDA -#include "core/providers/cuda/cuda_provider_factory_creator.h" -#endif #ifdef USE_ROCM #include "core/providers/rocm/rocm_provider_factory_creator.h" #endif @@ -16,26 +14,6 @@ #include "core/session/onnxruntime_cxx_api.h" namespace onnxruntime { - -std::shared_ptr CreateExecutionProviderFactory_OpenVINO( - const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, bool use_compiled_network, const char* blob_dump_path); - -std::shared_ptr CreateExecutionProviderFactory_Cuda(const OrtCUDAProviderOptions* provider_options); -std::shared_ptr CreateExecutionProviderFactory_Dnnl(int use_arena); -std::shared_ptr CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* params); -std::shared_ptr CreateExecutionProviderFactory_Nuphar(bool, const char*); -std::shared_ptr CreateExecutionProviderFactory_Nnapi(uint32_t); -std::shared_ptr CreateExecutionProviderFactory_Rknpu(); -std::shared_ptr CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* params); -std::shared_ptr CreateExecutionProviderFactory_MIGraphX(int device_id); -std::shared_ptr CreateExecutionProviderFactory_ACL(int use_arena); -std::shared_ptr CreateExecutionProviderFactory_ArmNN(int use_arena); -std::shared_ptr CreateExecutionProviderFactory_CoreML(uint32_t); - -// EP for internal testing -std::shared_ptr CreateExecutionProviderFactory_InternalTesting( - const std::unordered_set& supported_ops); - namespace test { std::unique_ptr DefaultCpuExecutionProvider(bool enable_arena) { diff --git a/onnxruntime/test/util/include/default_providers.h b/onnxruntime/test/util/include/default_providers.h index 76d55cae93..15c77b585d 100644 --- a/onnxruntime/test/util/include/default_providers.h +++ b/onnxruntime/test/util/include/default_providers.h @@ -1,9 +1,28 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #pragma once +#include "core/providers/providers.h" #include "core/framework/execution_provider.h" namespace onnxruntime { + +std::shared_ptr CreateExecutionProviderFactory_ACL(int use_arena); +std::shared_ptr CreateExecutionProviderFactory_ArmNN(int use_arena); +std::shared_ptr CreateExecutionProviderFactory_CoreML(uint32_t); +std::shared_ptr CreateExecutionProviderFactory_Cuda(const OrtCUDAProviderOptions* provider_options); +std::shared_ptr CreateExecutionProviderFactory_Dnnl(int use_arena); +std::shared_ptr CreateExecutionProviderFactory_MIGraphX(int device_id); +std::shared_ptr CreateExecutionProviderFactory_Nnapi(uint32_t); +std::shared_ptr CreateExecutionProviderFactory_Nuphar(bool, const char*); +std::shared_ptr CreateExecutionProviderFactory_OpenVINO( + const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, bool use_compiled_network, const char* blob_dump_path); +std::shared_ptr CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* params); +std::shared_ptr CreateExecutionProviderFactory_Rknpu(); +std::shared_ptr CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* params); + +// EP for internal testing +std::shared_ptr CreateExecutionProviderFactory_InternalTesting(const std::unordered_set& supported_ops); + namespace test { // unique_ptr providers with default values for session registration