diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index 56e3181921..f8b4aed4e0 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -715,6 +715,7 @@ if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS) target_link_libraries(onnxruntime_test_all PRIVATE onnxruntime_language_interop onnxruntime_pyop) endif() if (onnxruntime_USE_ROCM) + target_compile_options(onnxruntime_test_all PRIVATE -D__HIP_PLATFORM_HCC__=1) target_include_directories(onnxruntime_test_all PRIVATE ${onnxruntime_ROCM_HOME}/hipfft/include ${onnxruntime_ROCM_HOME}/include ${onnxruntime_ROCM_HOME}/hiprand/include ${onnxruntime_ROCM_HOME}/rocrand/include ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining) endif() if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) diff --git a/include/onnxruntime/core/framework/allocator.h b/include/onnxruntime/core/framework/allocator.h index 27d06e2ed0..565cba2b49 100644 --- a/include/onnxruntime/core/framework/allocator.h +++ b/include/onnxruntime/core/framework/allocator.h @@ -38,8 +38,6 @@ constexpr const char* CPU = "Cpu"; constexpr const char* CUDA = "Cuda"; constexpr const char* CUDA_PINNED = "CudaPinned"; constexpr const char* DML = "DML"; -constexpr const char* MIGRAPHX = "MIGraphX"; -constexpr const char* MIGRAPHX_PINNED = "MIGraphXPinned"; constexpr const char* OpenVINO_CPU = "OpenVINO_CPU"; constexpr const char* OpenVINO_GPU = "OpenVINO_GPU"; diff --git a/onnxruntime/core/optimizer/transformer_memcpy.cc b/onnxruntime/core/optimizer/transformer_memcpy.cc index aed760aa05..93370904e5 100644 --- a/onnxruntime/core/optimizer/transformer_memcpy.cc +++ b/onnxruntime/core/optimizer/transformer_memcpy.cc @@ -236,7 +236,8 @@ void TransformerMemcpyImpl::ProcessDefs(onnxruntime::Node& node, const KernelReg else provider_output_defs_.insert(arg); } - } else if (node_provider_type != kCudaExecutionProvider && node_provider_type != kTensorrtExecutionProvider) { + } else if (node_provider_type != kCudaExecutionProvider && node_provider_type != kTensorrtExecutionProvider && + node_provider_type != kRocmExecutionProvider && node_provider_type != kMIGraphXExecutionProvider) { // TODO: copy between devices? i.e. multiple GPUs if (node_provider_type != onnxruntime::kCpuExecutionProvider && node_provider_type != onnxruntime::kVitisAIExecutionProvider && diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc index 3ec5a55c08..b5cd16db86 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc @@ -102,9 +102,9 @@ std::shared_ptr MIGraphXExecutionProvider::GetKernelRegistry() c } MIGraphXExecutionProvider::MIGraphXExecutionProvider(const MIGraphXExecutionProviderInfo& info) - : IExecutionProvider{onnxruntime::kMIGraphXExecutionProvider, true} { + : IExecutionProvider{onnxruntime::kMIGraphXExecutionProvider, true}, device_id_(info.device_id) { // Set GPU device to be used - HIP_CALL_THROW(hipSetDevice(info.device_id)); + HIP_CALL_THROW(hipSetDevice(device_id_)); t_ = migraphx::target(info.target_device.c_str()); // whether fp16 is enable @@ -128,7 +128,7 @@ void MIGraphXExecutionProvider::RegisterAllocator(std::shared_ptrGetAllocator(device_id_, OrtMemTypeDefault); if (nullptr == allocator_) { AllocatorCreationInfo default_memory_info( - [](OrtDevice::DeviceId device_id) { return CreateHIPAllocator(device_id, onnxruntime::MIGRAPHX); }, device_id_); + [](OrtDevice::DeviceId device_id) { return CreateROCMAllocator(device_id, onnxruntime::CUDA); }, device_id_); allocator_ = CreateAllocator(default_memory_info); allocator_manager->InsertAllocator(allocator_); } @@ -141,7 +141,7 @@ void MIGraphXExecutionProvider::RegisterAllocator(std::shared_ptr MIGraphXProviderFactory::CreateProvider() { return std::make_unique(info_); } -// std::shared_ptr CreateExecutionProviderFactory_MIGraphX(int device_id) { -// MIGraphXExecutionProviderInfo info; -// info.device_id = device_id; -// return std::make_shared(info); -// } - std::shared_ptr CreateExecutionProviderFactory_MIGraphX(const MIGraphXExecutionProviderInfo& info) { return std::make_shared(info); } - -struct ProviderInfo_MIGRAPHX_Impl : ProviderInfo_MIGRAPHX { - std::unique_ptr CreateHIPAllocator(int16_t device_id, const char* name) override { - return std::make_unique(device_id, name); - } - - std::unique_ptr CreateHIPPinnedAllocator(int16_t device_id, const char* name) override { - return std::make_unique(device_id, name); - } - - std::unique_ptr CreateGPUDataTransfer(void* stream) override { - return std::make_unique(static_cast(stream)); - } -} g_info; - struct MIGraphX_Provider : Provider { - void* GetInfo() override { return &g_info; } - std::shared_ptr CreateExecutionProviderFactory(int device_id) override { MIGraphXExecutionProviderInfo info; info.device_id = device_id; diff --git a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h index ac4aaedf20..708fe0803a 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h +++ b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h @@ -10,11 +10,5 @@ struct IExecutionProviderFactory; struct MIGraphXExecutionProviderInfo; enum class ArenaExtendStrategy : int32_t; struct MIGraphXExecutionProviderExternalAllocatorInfo; - -struct ProviderInfo_MIGRAPHX { - virtual std::unique_ptr CreateHIPAllocator(int16_t device_id, const char* name) = 0; - virtual std::unique_ptr CreateHIPPinnedAllocator(int16_t device_id, const char* name) = 0; - virtual std::unique_ptr CreateGPUDataTransfer(void* stream) = 0; -}; } diff --git a/onnxruntime/core/providers/shared_library/provider_api.h b/onnxruntime/core/providers/shared_library/provider_api.h index 881987b4b8..78b5158944 100644 --- a/onnxruntime/core/providers/shared_library/provider_api.h +++ b/onnxruntime/core/providers/shared_library/provider_api.h @@ -243,8 +243,8 @@ std::unique_ptr CreateCPUAllocator(const OrtMemoryInfo& memory_info) std::unique_ptr CreateCUDAAllocator(int16_t device_id, const char* name); std::unique_ptr CreateCUDAPinnedAllocator(int16_t device_id, const char* name); -std::unique_ptr CreateHIPAllocator(int16_t device_id, const char* name); -std::unique_ptr CreateHIPPinnedAllocator(int16_t device_id, const char* name); +std::unique_ptr CreateROCMAllocator(int16_t device_id, const char* name); +std::unique_ptr CreateROCMPinnedAllocator(int16_t device_id, const char* name); std::unique_ptr CreateGPUDataTransfer(void* stream); diff --git a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc index d1057c6d24..e93f47fa0c 100644 --- a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc +++ b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc @@ -323,12 +323,12 @@ std::unique_ptr CreateGPUDataTransfer(void* stream) { #endif #ifdef USE_MIGRAPHX -std::unique_ptr CreateHIPAllocator(int16_t device_id, const char* name) { - return g_host->CreateHIPAllocator(device_id, name); +std::unique_ptr CreateROCMAllocator(int16_t device_id, const char* name) { + return g_host->CreateROCMAllocator(device_id, name); } -std::unique_ptr CreateHIPPinnedAllocator(int16_t device_id, const char* name) { - return g_host->CreateHIPPinnedAllocator(device_id, name); +std::unique_ptr CreateROCMPinnedAllocator(int16_t device_id, const char* name) { + return g_host->CreateROCMPinnedAllocator(device_id, name); } std::unique_ptr CreateGPUDataTransfer(void* stream) { diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index 73615a5c19..d95b9f0e3e 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -151,12 +151,6 @@ struct ProviderHost { virtual bool CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg) = 0; #endif -#ifdef USE_MIGRAPHX - virtual std::unique_ptr CreateHIPAllocator(int16_t device_id, const char* name) = 0; - virtual std::unique_ptr CreateHIPPinnedAllocator(int16_t device_id, const char* name) = 0; - virtual std::unique_ptr CreateGPUDataTransfer(void* stream) = 0; -#endif - #ifdef USE_ROCM virtual std::unique_ptr CreateROCMAllocator(int16_t device_id, const char* name) = 0; virtual std::unique_ptr CreateROCMPinnedAllocator(int16_t device_id, const char* name) = 0; diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 82fdc41126..fba81cb43e 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -91,8 +91,6 @@ namespace onnxruntime { ProviderInfo_CUDA* TryGetProviderInfo_CUDA(); ProviderInfo_CUDA& GetProviderInfo_CUDA(); -ProviderInfo_MIGRAPHX* TryGetProviderInfo_MIGRAPHX(); -ProviderInfo_MIGRAPHX& GetProviderInfo_MIGRAPHX(); ProviderInfo_ROCM* TryGetProviderInfo_ROCM(); ProviderInfo_ROCM& GetProviderInfo_ROCM(); ProviderHostCPU& GetProviderHostCPU(); @@ -188,12 +186,6 @@ struct ProviderHostImpl : ProviderHost { bool CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg) override { return GetProviderInfo_CUDA().CudaCall_true(retCode, exprString, libName, successCode, msg); } #endif -#ifdef USE_MIGRAPHX - std::unique_ptr CreateHIPAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_MIGRAPHX().CreateHIPAllocator(device_id, name); } - std::unique_ptr CreateHIPPinnedAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_MIGRAPHX().CreateHIPPinnedAllocator(device_id, name); } - std::unique_ptr CreateGPUDataTransfer(void* stream) override { return GetProviderInfo_MIGRAPHX().CreateGPUDataTransfer(stream); } -#endif - #ifdef USE_ROCM std::unique_ptr CreateROCMAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_ROCM().CreateROCMAllocator(device_id, name); } std::unique_ptr CreateROCMPinnedAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_ROCM().CreateROCMPinnedAllocator(device_id, name); } @@ -1084,12 +1076,6 @@ std::unique_ptr CreateCUDAPinnedAllocator(int16_t device_id, const c return nullptr; } -std::unique_ptr CreateHIPPinnedAllocator(int16_t device_id, const char* name) { - if (auto* info = onnxruntime::TryGetProviderInfo_MIGRAPHX()) - return info->CreateHIPPinnedAllocator(device_id, name); - - return nullptr; -} std::unique_ptr CreateROCMPinnedAllocator(int16_t device_id, const char* name) { if (auto* info = onnxruntime::TryGetProviderInfo_ROCM()) return info->CreateROCMPinnedAllocator(device_id, name); @@ -1199,13 +1185,6 @@ ProviderInfo_CUDA& GetProviderInfo_CUDA() { ORT_THROW("CUDA Provider not available, can't get interface for it"); } -ProviderInfo_MIGRAPHX* TryGetProviderInfo_MIGRAPHX() { - if (auto* provider = s_library_migraphx.Get()) - return reinterpret_cast(provider->GetInfo()); - - return nullptr; -} - ProviderInfo_ROCM* TryGetProviderInfo_ROCM() { if (auto* provider = s_library_rocm.Get()) return reinterpret_cast(provider->GetInfo()); @@ -1213,13 +1192,6 @@ ProviderInfo_ROCM* TryGetProviderInfo_ROCM() { return nullptr; } -ProviderInfo_MIGRAPHX& GetProviderInfo_MIGRAPHX() { - if (auto* info = TryGetProviderInfo_MIGRAPHX()) - return *info; - - ORT_THROW("MIGRAPHX Provider not available, can't get interface for it"); -} - ProviderInfo_ROCM& GetProviderInfo_ROCM() { if (auto* info = TryGetProviderInfo_ROCM()) return *info; diff --git a/onnxruntime/python/onnxruntime_inference_collection.py b/onnxruntime/python/onnxruntime_inference_collection.py index f753ccff31..f382312ecf 100644 --- a/onnxruntime/python/onnxruntime_inference_collection.py +++ b/onnxruntime/python/onnxruntime_inference_collection.py @@ -349,6 +349,8 @@ class InferenceSession(Session): # Tensorrt can fall back to CUDA. All others fall back to CPU. if 'TensorrtExecutionProvider' in available_providers: self._fallback_providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] + elif 'MIGraphXExecutionProvider' in available_providers: + self._fallback_providers = ['ROCMExecutionProvider', 'CPUExecutionProvider'] else: self._fallback_providers = ['CPUExecutionProvider'] diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 75a2c178ea..30840bc7a5 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -385,6 +385,12 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device } else if (provider_name == onnxruntime::kMIGraphXExecutionProvider) { #ifdef USE_MIGRAPHX Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(session_options, 0)); + + OrtROCMProviderOptions rocm_options; + rocm_options.miopen_conv_exhaustive_search = performance_test_config.run_config.cudnn_conv_algo; + rocm_options.do_copy_in_default_stream = !performance_test_config.run_config.do_cuda_copy_in_separate_stream; + // TODO: Support arena configuration for users of perf test + session_options.AppendExecutionProvider_ROCM(rocm_options); #else ORT_THROW("MIGraphX is not supported in this build\n"); #endif diff --git a/setup.py b/setup.py index 6130fcdbba..6735e83d89 100644 --- a/setup.py +++ b/setup.py @@ -216,7 +216,7 @@ try: if len(args) > 3: subprocess.run(args, check=True, stdout=subprocess.PIPE) - dest = 'onnxruntime/capi/libonnxruntime_providers_tensorrt.so' + dest = 'onnxruntime/capi/libonnxruntime_providers_' + ('migraphx.so' if is_rocm else 'tensorrt.so') if path.isfile(dest): result = subprocess.run(['patchelf', '--print-needed', dest], check=True, stdout=subprocess.PIPE, universal_newlines=True) @@ -253,21 +253,21 @@ except ImportError as error: print(error) bdist_wheel = None -providers_cuda_or_rocm = 'libonnxruntime_providers_rocm.so' if is_rocm else 'libonnxruntime_providers_cuda.so' - +providers_cuda_or_rocm = 'libonnxruntime_providers_' + ('rocm.so' if is_rocm else 'cuda.so') +providers_tensorrt_or_migraphx = 'libonnxruntime_providers_' + ('migraphx.so' if is_rocm else 'tensorrt.so') # Additional binaries if platform.system() == 'Linux': libs = ['onnxruntime_pybind11_state.so', 'libdnnl.so.2', 'libmklml_intel.so', 'libmklml_gnu.so', 'libiomp5.so', 'mimalloc.so'] dl_libs = ['libonnxruntime_providers_shared.so'] dl_libs.append(providers_cuda_or_rocm) - dl_libs.append('libonnxruntime_providers_tensorrt.so') + dl_libs.append(providers_tensorrt_or_migraphx) # DNNL, TensorRT & OpenVINO EPs are built as shared libs libs.extend(['libonnxruntime_providers_shared.so']) libs.extend(['libonnxruntime_providers_dnnl.so']) - libs.extend(['libonnxruntime_providers_tensorrt.so']) libs.extend(['libonnxruntime_providers_openvino.so']) libs.append(providers_cuda_or_rocm) + libs.append(providers_tensorrt_or_migraphx) # Nuphar Libs libs.extend(['libtvm.so.0.5.1']) if nightly_build: diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index c0d2ad04a3..3e2f5e2248 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -2038,6 +2038,9 @@ def main(): if args.use_tensorrt: args.use_cuda = True + if args.use_migraphx: + args.use_rocm = True + if args.build_wheel or args.gen_doc: args.enable_pybind = True