mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-16 21:00:14 +00:00
support MIGraphXEP to work with ROCMEP for inference on AMD GPU (#10368)
Co-authored-by: Weixing Zhang <wezhan@microsoft.com> Support MIGraphXEP to work with ROCMEP for inference on AMD GPU
This commit is contained in:
parent
389d2db1ce
commit
ea9c8a7cdc
14 changed files with 29 additions and 81 deletions
|
|
@ -715,6 +715,7 @@ if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS)
|
|||
target_link_libraries(onnxruntime_test_all PRIVATE onnxruntime_language_interop onnxruntime_pyop)
|
||||
endif()
|
||||
if (onnxruntime_USE_ROCM)
|
||||
target_compile_options(onnxruntime_test_all PRIVATE -D__HIP_PLATFORM_HCC__=1)
|
||||
target_include_directories(onnxruntime_test_all PRIVATE ${onnxruntime_ROCM_HOME}/hipfft/include ${onnxruntime_ROCM_HOME}/include ${onnxruntime_ROCM_HOME}/hiprand/include ${onnxruntime_ROCM_HOME}/rocrand/include ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining)
|
||||
endif()
|
||||
if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
|
||||
|
|
|
|||
|
|
@ -38,8 +38,6 @@ constexpr const char* CPU = "Cpu";
|
|||
constexpr const char* CUDA = "Cuda";
|
||||
constexpr const char* CUDA_PINNED = "CudaPinned";
|
||||
constexpr const char* DML = "DML";
|
||||
constexpr const char* MIGRAPHX = "MIGraphX";
|
||||
constexpr const char* MIGRAPHX_PINNED = "MIGraphXPinned";
|
||||
constexpr const char* OpenVINO_CPU = "OpenVINO_CPU";
|
||||
constexpr const char* OpenVINO_GPU = "OpenVINO_GPU";
|
||||
|
||||
|
|
|
|||
|
|
@ -236,7 +236,8 @@ void TransformerMemcpyImpl::ProcessDefs(onnxruntime::Node& node, const KernelReg
|
|||
else
|
||||
provider_output_defs_.insert(arg);
|
||||
}
|
||||
} else if (node_provider_type != kCudaExecutionProvider && node_provider_type != kTensorrtExecutionProvider) {
|
||||
} else if (node_provider_type != kCudaExecutionProvider && node_provider_type != kTensorrtExecutionProvider &&
|
||||
node_provider_type != kRocmExecutionProvider && node_provider_type != kMIGraphXExecutionProvider) {
|
||||
// TODO: copy between devices? i.e. multiple GPUs
|
||||
if (node_provider_type != onnxruntime::kCpuExecutionProvider &&
|
||||
node_provider_type != onnxruntime::kVitisAIExecutionProvider &&
|
||||
|
|
|
|||
|
|
@ -102,9 +102,9 @@ std::shared_ptr<KernelRegistry> MIGraphXExecutionProvider::GetKernelRegistry() c
|
|||
}
|
||||
|
||||
MIGraphXExecutionProvider::MIGraphXExecutionProvider(const MIGraphXExecutionProviderInfo& info)
|
||||
: IExecutionProvider{onnxruntime::kMIGraphXExecutionProvider, true} {
|
||||
: IExecutionProvider{onnxruntime::kMIGraphXExecutionProvider, true}, device_id_(info.device_id) {
|
||||
// Set GPU device to be used
|
||||
HIP_CALL_THROW(hipSetDevice(info.device_id));
|
||||
HIP_CALL_THROW(hipSetDevice(device_id_));
|
||||
t_ = migraphx::target(info.target_device.c_str());
|
||||
|
||||
// whether fp16 is enable
|
||||
|
|
@ -128,7 +128,7 @@ void MIGraphXExecutionProvider::RegisterAllocator(std::shared_ptr<AllocatorManag
|
|||
allocator_ = allocator_manager->GetAllocator(device_id_, OrtMemTypeDefault);
|
||||
if (nullptr == allocator_) {
|
||||
AllocatorCreationInfo default_memory_info(
|
||||
[](OrtDevice::DeviceId device_id) { return CreateHIPAllocator(device_id, onnxruntime::MIGRAPHX); }, device_id_);
|
||||
[](OrtDevice::DeviceId device_id) { return CreateROCMAllocator(device_id, onnxruntime::CUDA); }, device_id_);
|
||||
allocator_ = CreateAllocator(default_memory_info);
|
||||
allocator_manager->InsertAllocator(allocator_);
|
||||
}
|
||||
|
|
@ -141,7 +141,7 @@ void MIGraphXExecutionProvider::RegisterAllocator(std::shared_ptr<AllocatorManag
|
|||
if (nullptr == hip_pinned_alloc) {
|
||||
AllocatorCreationInfo pinned_allocator_info(
|
||||
[](OrtDevice::DeviceId device_id) {
|
||||
return CreateHIPPinnedAllocator(device_id, onnxruntime::MIGRAPHX_PINNED);
|
||||
return CreateROCMPinnedAllocator(device_id, onnxruntime::CUDA_PINNED);
|
||||
},
|
||||
DEFAULT_CPU_ALLOCATOR_DEVICE_ID);
|
||||
hip_pinned_alloc = CreateAllocator(pinned_allocator_info);
|
||||
|
|
|
|||
|
|
@ -32,34 +32,11 @@ std::unique_ptr<IExecutionProvider> MIGraphXProviderFactory::CreateProvider() {
|
|||
return std::make_unique<MIGraphXExecutionProvider>(info_);
|
||||
}
|
||||
|
||||
// std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGraphX(int device_id) {
|
||||
// MIGraphXExecutionProviderInfo info;
|
||||
// info.device_id = device_id;
|
||||
// return std::make_shared<onnxruntime::MIGraphXProviderFactory>(info);
|
||||
// }
|
||||
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGraphX(const MIGraphXExecutionProviderInfo& info) {
|
||||
return std::make_shared<onnxruntime::MIGraphXProviderFactory>(info);
|
||||
}
|
||||
|
||||
|
||||
struct ProviderInfo_MIGRAPHX_Impl : ProviderInfo_MIGRAPHX {
|
||||
std::unique_ptr<IAllocator> CreateHIPAllocator(int16_t device_id, const char* name) override {
|
||||
return std::make_unique<HIPAllocator>(device_id, name);
|
||||
}
|
||||
|
||||
std::unique_ptr<IAllocator> CreateHIPPinnedAllocator(int16_t device_id, const char* name) override {
|
||||
return std::make_unique<HIPPinnedAllocator>(device_id, name);
|
||||
}
|
||||
|
||||
std::unique_ptr<IDataTransfer> CreateGPUDataTransfer(void* stream) override {
|
||||
return std::make_unique<GPUDataTransfer>(static_cast<hipStream_t>(stream));
|
||||
}
|
||||
} g_info;
|
||||
|
||||
struct MIGraphX_Provider : Provider {
|
||||
void* GetInfo() override { return &g_info; }
|
||||
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory(int device_id) override {
|
||||
MIGraphXExecutionProviderInfo info;
|
||||
info.device_id = device_id;
|
||||
|
|
|
|||
|
|
@ -10,11 +10,5 @@ struct IExecutionProviderFactory;
|
|||
struct MIGraphXExecutionProviderInfo;
|
||||
enum class ArenaExtendStrategy : int32_t;
|
||||
struct MIGraphXExecutionProviderExternalAllocatorInfo;
|
||||
|
||||
struct ProviderInfo_MIGRAPHX {
|
||||
virtual std::unique_ptr<onnxruntime::IAllocator> CreateHIPAllocator(int16_t device_id, const char* name) = 0;
|
||||
virtual std::unique_ptr<onnxruntime::IAllocator> CreateHIPPinnedAllocator(int16_t device_id, const char* name) = 0;
|
||||
virtual std::unique_ptr<onnxruntime::IDataTransfer> CreateGPUDataTransfer(void* stream) = 0;
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -243,8 +243,8 @@ std::unique_ptr<IAllocator> CreateCPUAllocator(const OrtMemoryInfo& memory_info)
|
|||
std::unique_ptr<IAllocator> CreateCUDAAllocator(int16_t device_id, const char* name);
|
||||
std::unique_ptr<IAllocator> CreateCUDAPinnedAllocator(int16_t device_id, const char* name);
|
||||
|
||||
std::unique_ptr<IAllocator> CreateHIPAllocator(int16_t device_id, const char* name);
|
||||
std::unique_ptr<IAllocator> CreateHIPPinnedAllocator(int16_t device_id, const char* name);
|
||||
std::unique_ptr<IAllocator> CreateROCMAllocator(int16_t device_id, const char* name);
|
||||
std::unique_ptr<IAllocator> CreateROCMPinnedAllocator(int16_t device_id, const char* name);
|
||||
|
||||
std::unique_ptr<IDataTransfer> CreateGPUDataTransfer(void* stream);
|
||||
|
||||
|
|
|
|||
|
|
@ -323,12 +323,12 @@ std::unique_ptr<IDataTransfer> CreateGPUDataTransfer(void* stream) {
|
|||
#endif
|
||||
|
||||
#ifdef USE_MIGRAPHX
|
||||
std::unique_ptr<IAllocator> CreateHIPAllocator(int16_t device_id, const char* name) {
|
||||
return g_host->CreateHIPAllocator(device_id, name);
|
||||
std::unique_ptr<IAllocator> CreateROCMAllocator(int16_t device_id, const char* name) {
|
||||
return g_host->CreateROCMAllocator(device_id, name);
|
||||
}
|
||||
|
||||
std::unique_ptr<IAllocator> CreateHIPPinnedAllocator(int16_t device_id, const char* name) {
|
||||
return g_host->CreateHIPPinnedAllocator(device_id, name);
|
||||
std::unique_ptr<IAllocator> CreateROCMPinnedAllocator(int16_t device_id, const char* name) {
|
||||
return g_host->CreateROCMPinnedAllocator(device_id, name);
|
||||
}
|
||||
|
||||
std::unique_ptr<IDataTransfer> CreateGPUDataTransfer(void* stream) {
|
||||
|
|
|
|||
|
|
@ -151,12 +151,6 @@ struct ProviderHost {
|
|||
virtual bool CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg) = 0;
|
||||
#endif
|
||||
|
||||
#ifdef USE_MIGRAPHX
|
||||
virtual std::unique_ptr<IAllocator> CreateHIPAllocator(int16_t device_id, const char* name) = 0;
|
||||
virtual std::unique_ptr<IAllocator> CreateHIPPinnedAllocator(int16_t device_id, const char* name) = 0;
|
||||
virtual std::unique_ptr<IDataTransfer> CreateGPUDataTransfer(void* stream) = 0;
|
||||
#endif
|
||||
|
||||
#ifdef USE_ROCM
|
||||
virtual std::unique_ptr<IAllocator> CreateROCMAllocator(int16_t device_id, const char* name) = 0;
|
||||
virtual std::unique_ptr<IAllocator> CreateROCMPinnedAllocator(int16_t device_id, const char* name) = 0;
|
||||
|
|
|
|||
|
|
@ -91,8 +91,6 @@ namespace onnxruntime {
|
|||
|
||||
ProviderInfo_CUDA* TryGetProviderInfo_CUDA();
|
||||
ProviderInfo_CUDA& GetProviderInfo_CUDA();
|
||||
ProviderInfo_MIGRAPHX* TryGetProviderInfo_MIGRAPHX();
|
||||
ProviderInfo_MIGRAPHX& GetProviderInfo_MIGRAPHX();
|
||||
ProviderInfo_ROCM* TryGetProviderInfo_ROCM();
|
||||
ProviderInfo_ROCM& GetProviderInfo_ROCM();
|
||||
ProviderHostCPU& GetProviderHostCPU();
|
||||
|
|
@ -188,12 +186,6 @@ struct ProviderHostImpl : ProviderHost {
|
|||
bool CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg) override { return GetProviderInfo_CUDA().CudaCall_true(retCode, exprString, libName, successCode, msg); }
|
||||
#endif
|
||||
|
||||
#ifdef USE_MIGRAPHX
|
||||
std::unique_ptr<IAllocator> CreateHIPAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_MIGRAPHX().CreateHIPAllocator(device_id, name); }
|
||||
std::unique_ptr<IAllocator> CreateHIPPinnedAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_MIGRAPHX().CreateHIPPinnedAllocator(device_id, name); }
|
||||
std::unique_ptr<IDataTransfer> CreateGPUDataTransfer(void* stream) override { return GetProviderInfo_MIGRAPHX().CreateGPUDataTransfer(stream); }
|
||||
#endif
|
||||
|
||||
#ifdef USE_ROCM
|
||||
std::unique_ptr<IAllocator> CreateROCMAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_ROCM().CreateROCMAllocator(device_id, name); }
|
||||
std::unique_ptr<IAllocator> CreateROCMPinnedAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_ROCM().CreateROCMPinnedAllocator(device_id, name); }
|
||||
|
|
@ -1084,12 +1076,6 @@ std::unique_ptr<IAllocator> CreateCUDAPinnedAllocator(int16_t device_id, const c
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
std::unique_ptr<IAllocator> CreateHIPPinnedAllocator(int16_t device_id, const char* name) {
|
||||
if (auto* info = onnxruntime::TryGetProviderInfo_MIGRAPHX())
|
||||
return info->CreateHIPPinnedAllocator(device_id, name);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
std::unique_ptr<IAllocator> CreateROCMPinnedAllocator(int16_t device_id, const char* name) {
|
||||
if (auto* info = onnxruntime::TryGetProviderInfo_ROCM())
|
||||
return info->CreateROCMPinnedAllocator(device_id, name);
|
||||
|
|
@ -1199,13 +1185,6 @@ ProviderInfo_CUDA& GetProviderInfo_CUDA() {
|
|||
ORT_THROW("CUDA Provider not available, can't get interface for it");
|
||||
}
|
||||
|
||||
ProviderInfo_MIGRAPHX* TryGetProviderInfo_MIGRAPHX() {
|
||||
if (auto* provider = s_library_migraphx.Get())
|
||||
return reinterpret_cast<ProviderInfo_MIGRAPHX*>(provider->GetInfo());
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ProviderInfo_ROCM* TryGetProviderInfo_ROCM() {
|
||||
if (auto* provider = s_library_rocm.Get())
|
||||
return reinterpret_cast<ProviderInfo_ROCM*>(provider->GetInfo());
|
||||
|
|
@ -1213,13 +1192,6 @@ ProviderInfo_ROCM* TryGetProviderInfo_ROCM() {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
ProviderInfo_MIGRAPHX& GetProviderInfo_MIGRAPHX() {
|
||||
if (auto* info = TryGetProviderInfo_MIGRAPHX())
|
||||
return *info;
|
||||
|
||||
ORT_THROW("MIGRAPHX Provider not available, can't get interface for it");
|
||||
}
|
||||
|
||||
ProviderInfo_ROCM& GetProviderInfo_ROCM() {
|
||||
if (auto* info = TryGetProviderInfo_ROCM())
|
||||
return *info;
|
||||
|
|
|
|||
|
|
@ -349,6 +349,8 @@ class InferenceSession(Session):
|
|||
# Tensorrt can fall back to CUDA. All others fall back to CPU.
|
||||
if 'TensorrtExecutionProvider' in available_providers:
|
||||
self._fallback_providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
|
||||
elif 'MIGraphXExecutionProvider' in available_providers:
|
||||
self._fallback_providers = ['ROCMExecutionProvider', 'CPUExecutionProvider']
|
||||
else:
|
||||
self._fallback_providers = ['CPUExecutionProvider']
|
||||
|
||||
|
|
|
|||
|
|
@ -385,6 +385,12 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
|
|||
} else if (provider_name == onnxruntime::kMIGraphXExecutionProvider) {
|
||||
#ifdef USE_MIGRAPHX
|
||||
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(session_options, 0));
|
||||
|
||||
OrtROCMProviderOptions rocm_options;
|
||||
rocm_options.miopen_conv_exhaustive_search = performance_test_config.run_config.cudnn_conv_algo;
|
||||
rocm_options.do_copy_in_default_stream = !performance_test_config.run_config.do_cuda_copy_in_separate_stream;
|
||||
// TODO: Support arena configuration for users of perf test
|
||||
session_options.AppendExecutionProvider_ROCM(rocm_options);
|
||||
#else
|
||||
ORT_THROW("MIGraphX is not supported in this build\n");
|
||||
#endif
|
||||
|
|
|
|||
10
setup.py
10
setup.py
|
|
@ -216,7 +216,7 @@ try:
|
|||
if len(args) > 3:
|
||||
subprocess.run(args, check=True, stdout=subprocess.PIPE)
|
||||
|
||||
dest = 'onnxruntime/capi/libonnxruntime_providers_tensorrt.so'
|
||||
dest = 'onnxruntime/capi/libonnxruntime_providers_' + ('migraphx.so' if is_rocm else 'tensorrt.so')
|
||||
if path.isfile(dest):
|
||||
result = subprocess.run(['patchelf', '--print-needed', dest],
|
||||
check=True, stdout=subprocess.PIPE, universal_newlines=True)
|
||||
|
|
@ -253,21 +253,21 @@ except ImportError as error:
|
|||
print(error)
|
||||
bdist_wheel = None
|
||||
|
||||
providers_cuda_or_rocm = 'libonnxruntime_providers_rocm.so' if is_rocm else 'libonnxruntime_providers_cuda.so'
|
||||
|
||||
providers_cuda_or_rocm = 'libonnxruntime_providers_' + ('rocm.so' if is_rocm else 'cuda.so')
|
||||
providers_tensorrt_or_migraphx = 'libonnxruntime_providers_' + ('migraphx.so' if is_rocm else 'tensorrt.so')
|
||||
# Additional binaries
|
||||
if platform.system() == 'Linux':
|
||||
libs = ['onnxruntime_pybind11_state.so', 'libdnnl.so.2', 'libmklml_intel.so', 'libmklml_gnu.so', 'libiomp5.so',
|
||||
'mimalloc.so']
|
||||
dl_libs = ['libonnxruntime_providers_shared.so']
|
||||
dl_libs.append(providers_cuda_or_rocm)
|
||||
dl_libs.append('libonnxruntime_providers_tensorrt.so')
|
||||
dl_libs.append(providers_tensorrt_or_migraphx)
|
||||
# DNNL, TensorRT & OpenVINO EPs are built as shared libs
|
||||
libs.extend(['libonnxruntime_providers_shared.so'])
|
||||
libs.extend(['libonnxruntime_providers_dnnl.so'])
|
||||
libs.extend(['libonnxruntime_providers_tensorrt.so'])
|
||||
libs.extend(['libonnxruntime_providers_openvino.so'])
|
||||
libs.append(providers_cuda_or_rocm)
|
||||
libs.append(providers_tensorrt_or_migraphx)
|
||||
# Nuphar Libs
|
||||
libs.extend(['libtvm.so.0.5.1'])
|
||||
if nightly_build:
|
||||
|
|
|
|||
|
|
@ -2038,6 +2038,9 @@ def main():
|
|||
if args.use_tensorrt:
|
||||
args.use_cuda = True
|
||||
|
||||
if args.use_migraphx:
|
||||
args.use_rocm = True
|
||||
|
||||
if args.build_wheel or args.gen_doc:
|
||||
args.enable_pybind = True
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue