support MIGraphXEP to work with ROCMEP for inference on AMD GPU (#10368)

Co-authored-by: Weixing Zhang <wezhan@microsoft.com>

Support MIGraphXEP to work with ROCMEP for inference on AMD GPU
This commit is contained in:
Weixing Zhang 2022-01-26 15:52:56 -08:00 committed by GitHub
parent 389d2db1ce
commit ea9c8a7cdc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 29 additions and 81 deletions

View file

@ -715,6 +715,7 @@ if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS)
target_link_libraries(onnxruntime_test_all PRIVATE onnxruntime_language_interop onnxruntime_pyop)
endif()
if (onnxruntime_USE_ROCM)
target_compile_options(onnxruntime_test_all PRIVATE -D__HIP_PLATFORM_HCC__=1)
target_include_directories(onnxruntime_test_all PRIVATE ${onnxruntime_ROCM_HOME}/hipfft/include ${onnxruntime_ROCM_HOME}/include ${onnxruntime_ROCM_HOME}/hiprand/include ${onnxruntime_ROCM_HOME}/rocrand/include ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining)
endif()
if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)

View file

@ -38,8 +38,6 @@ constexpr const char* CPU = "Cpu";
constexpr const char* CUDA = "Cuda";
constexpr const char* CUDA_PINNED = "CudaPinned";
constexpr const char* DML = "DML";
constexpr const char* MIGRAPHX = "MIGraphX";
constexpr const char* MIGRAPHX_PINNED = "MIGraphXPinned";
constexpr const char* OpenVINO_CPU = "OpenVINO_CPU";
constexpr const char* OpenVINO_GPU = "OpenVINO_GPU";

View file

@ -236,7 +236,8 @@ void TransformerMemcpyImpl::ProcessDefs(onnxruntime::Node& node, const KernelReg
else
provider_output_defs_.insert(arg);
}
} else if (node_provider_type != kCudaExecutionProvider && node_provider_type != kTensorrtExecutionProvider) {
} else if (node_provider_type != kCudaExecutionProvider && node_provider_type != kTensorrtExecutionProvider &&
node_provider_type != kRocmExecutionProvider && node_provider_type != kMIGraphXExecutionProvider) {
// TODO: copy between devices? i.e. multiple GPUs
if (node_provider_type != onnxruntime::kCpuExecutionProvider &&
node_provider_type != onnxruntime::kVitisAIExecutionProvider &&

View file

@ -102,9 +102,9 @@ std::shared_ptr<KernelRegistry> MIGraphXExecutionProvider::GetKernelRegistry() c
}
MIGraphXExecutionProvider::MIGraphXExecutionProvider(const MIGraphXExecutionProviderInfo& info)
: IExecutionProvider{onnxruntime::kMIGraphXExecutionProvider, true} {
: IExecutionProvider{onnxruntime::kMIGraphXExecutionProvider, true}, device_id_(info.device_id) {
// Set GPU device to be used
HIP_CALL_THROW(hipSetDevice(info.device_id));
HIP_CALL_THROW(hipSetDevice(device_id_));
t_ = migraphx::target(info.target_device.c_str());
// whether fp16 is enable
@ -128,7 +128,7 @@ void MIGraphXExecutionProvider::RegisterAllocator(std::shared_ptr<AllocatorManag
allocator_ = allocator_manager->GetAllocator(device_id_, OrtMemTypeDefault);
if (nullptr == allocator_) {
AllocatorCreationInfo default_memory_info(
[](OrtDevice::DeviceId device_id) { return CreateHIPAllocator(device_id, onnxruntime::MIGRAPHX); }, device_id_);
[](OrtDevice::DeviceId device_id) { return CreateROCMAllocator(device_id, onnxruntime::CUDA); }, device_id_);
allocator_ = CreateAllocator(default_memory_info);
allocator_manager->InsertAllocator(allocator_);
}
@ -141,7 +141,7 @@ void MIGraphXExecutionProvider::RegisterAllocator(std::shared_ptr<AllocatorManag
if (nullptr == hip_pinned_alloc) {
AllocatorCreationInfo pinned_allocator_info(
[](OrtDevice::DeviceId device_id) {
return CreateHIPPinnedAllocator(device_id, onnxruntime::MIGRAPHX_PINNED);
return CreateROCMPinnedAllocator(device_id, onnxruntime::CUDA_PINNED);
},
DEFAULT_CPU_ALLOCATOR_DEVICE_ID);
hip_pinned_alloc = CreateAllocator(pinned_allocator_info);

View file

@ -32,34 +32,11 @@ std::unique_ptr<IExecutionProvider> MIGraphXProviderFactory::CreateProvider() {
return std::make_unique<MIGraphXExecutionProvider>(info_);
}
// std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGraphX(int device_id) {
// MIGraphXExecutionProviderInfo info;
// info.device_id = device_id;
// return std::make_shared<onnxruntime::MIGraphXProviderFactory>(info);
// }
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGraphX(const MIGraphXExecutionProviderInfo& info) {
return std::make_shared<onnxruntime::MIGraphXProviderFactory>(info);
}
struct ProviderInfo_MIGRAPHX_Impl : ProviderInfo_MIGRAPHX {
std::unique_ptr<IAllocator> CreateHIPAllocator(int16_t device_id, const char* name) override {
return std::make_unique<HIPAllocator>(device_id, name);
}
std::unique_ptr<IAllocator> CreateHIPPinnedAllocator(int16_t device_id, const char* name) override {
return std::make_unique<HIPPinnedAllocator>(device_id, name);
}
std::unique_ptr<IDataTransfer> CreateGPUDataTransfer(void* stream) override {
return std::make_unique<GPUDataTransfer>(static_cast<hipStream_t>(stream));
}
} g_info;
struct MIGraphX_Provider : Provider {
void* GetInfo() override { return &g_info; }
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory(int device_id) override {
MIGraphXExecutionProviderInfo info;
info.device_id = device_id;

View file

@ -10,11 +10,5 @@ struct IExecutionProviderFactory;
struct MIGraphXExecutionProviderInfo;
enum class ArenaExtendStrategy : int32_t;
struct MIGraphXExecutionProviderExternalAllocatorInfo;
struct ProviderInfo_MIGRAPHX {
virtual std::unique_ptr<onnxruntime::IAllocator> CreateHIPAllocator(int16_t device_id, const char* name) = 0;
virtual std::unique_ptr<onnxruntime::IAllocator> CreateHIPPinnedAllocator(int16_t device_id, const char* name) = 0;
virtual std::unique_ptr<onnxruntime::IDataTransfer> CreateGPUDataTransfer(void* stream) = 0;
};
}

View file

@ -243,8 +243,8 @@ std::unique_ptr<IAllocator> CreateCPUAllocator(const OrtMemoryInfo& memory_info)
std::unique_ptr<IAllocator> CreateCUDAAllocator(int16_t device_id, const char* name);
std::unique_ptr<IAllocator> CreateCUDAPinnedAllocator(int16_t device_id, const char* name);
std::unique_ptr<IAllocator> CreateHIPAllocator(int16_t device_id, const char* name);
std::unique_ptr<IAllocator> CreateHIPPinnedAllocator(int16_t device_id, const char* name);
std::unique_ptr<IAllocator> CreateROCMAllocator(int16_t device_id, const char* name);
std::unique_ptr<IAllocator> CreateROCMPinnedAllocator(int16_t device_id, const char* name);
std::unique_ptr<IDataTransfer> CreateGPUDataTransfer(void* stream);

View file

@ -323,12 +323,12 @@ std::unique_ptr<IDataTransfer> CreateGPUDataTransfer(void* stream) {
#endif
#ifdef USE_MIGRAPHX
std::unique_ptr<IAllocator> CreateHIPAllocator(int16_t device_id, const char* name) {
return g_host->CreateHIPAllocator(device_id, name);
std::unique_ptr<IAllocator> CreateROCMAllocator(int16_t device_id, const char* name) {
return g_host->CreateROCMAllocator(device_id, name);
}
std::unique_ptr<IAllocator> CreateHIPPinnedAllocator(int16_t device_id, const char* name) {
return g_host->CreateHIPPinnedAllocator(device_id, name);
std::unique_ptr<IAllocator> CreateROCMPinnedAllocator(int16_t device_id, const char* name) {
return g_host->CreateROCMPinnedAllocator(device_id, name);
}
std::unique_ptr<IDataTransfer> CreateGPUDataTransfer(void* stream) {

View file

@ -151,12 +151,6 @@ struct ProviderHost {
virtual bool CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg) = 0;
#endif
#ifdef USE_MIGRAPHX
virtual std::unique_ptr<IAllocator> CreateHIPAllocator(int16_t device_id, const char* name) = 0;
virtual std::unique_ptr<IAllocator> CreateHIPPinnedAllocator(int16_t device_id, const char* name) = 0;
virtual std::unique_ptr<IDataTransfer> CreateGPUDataTransfer(void* stream) = 0;
#endif
#ifdef USE_ROCM
virtual std::unique_ptr<IAllocator> CreateROCMAllocator(int16_t device_id, const char* name) = 0;
virtual std::unique_ptr<IAllocator> CreateROCMPinnedAllocator(int16_t device_id, const char* name) = 0;

View file

@ -91,8 +91,6 @@ namespace onnxruntime {
ProviderInfo_CUDA* TryGetProviderInfo_CUDA();
ProviderInfo_CUDA& GetProviderInfo_CUDA();
ProviderInfo_MIGRAPHX* TryGetProviderInfo_MIGRAPHX();
ProviderInfo_MIGRAPHX& GetProviderInfo_MIGRAPHX();
ProviderInfo_ROCM* TryGetProviderInfo_ROCM();
ProviderInfo_ROCM& GetProviderInfo_ROCM();
ProviderHostCPU& GetProviderHostCPU();
@ -188,12 +186,6 @@ struct ProviderHostImpl : ProviderHost {
bool CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg) override { return GetProviderInfo_CUDA().CudaCall_true(retCode, exprString, libName, successCode, msg); }
#endif
#ifdef USE_MIGRAPHX
std::unique_ptr<IAllocator> CreateHIPAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_MIGRAPHX().CreateHIPAllocator(device_id, name); }
std::unique_ptr<IAllocator> CreateHIPPinnedAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_MIGRAPHX().CreateHIPPinnedAllocator(device_id, name); }
std::unique_ptr<IDataTransfer> CreateGPUDataTransfer(void* stream) override { return GetProviderInfo_MIGRAPHX().CreateGPUDataTransfer(stream); }
#endif
#ifdef USE_ROCM
std::unique_ptr<IAllocator> CreateROCMAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_ROCM().CreateROCMAllocator(device_id, name); }
std::unique_ptr<IAllocator> CreateROCMPinnedAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_ROCM().CreateROCMPinnedAllocator(device_id, name); }
@ -1084,12 +1076,6 @@ std::unique_ptr<IAllocator> CreateCUDAPinnedAllocator(int16_t device_id, const c
return nullptr;
}
std::unique_ptr<IAllocator> CreateHIPPinnedAllocator(int16_t device_id, const char* name) {
if (auto* info = onnxruntime::TryGetProviderInfo_MIGRAPHX())
return info->CreateHIPPinnedAllocator(device_id, name);
return nullptr;
}
std::unique_ptr<IAllocator> CreateROCMPinnedAllocator(int16_t device_id, const char* name) {
if (auto* info = onnxruntime::TryGetProviderInfo_ROCM())
return info->CreateROCMPinnedAllocator(device_id, name);
@ -1199,13 +1185,6 @@ ProviderInfo_CUDA& GetProviderInfo_CUDA() {
ORT_THROW("CUDA Provider not available, can't get interface for it");
}
ProviderInfo_MIGRAPHX* TryGetProviderInfo_MIGRAPHX() {
if (auto* provider = s_library_migraphx.Get())
return reinterpret_cast<ProviderInfo_MIGRAPHX*>(provider->GetInfo());
return nullptr;
}
ProviderInfo_ROCM* TryGetProviderInfo_ROCM() {
if (auto* provider = s_library_rocm.Get())
return reinterpret_cast<ProviderInfo_ROCM*>(provider->GetInfo());
@ -1213,13 +1192,6 @@ ProviderInfo_ROCM* TryGetProviderInfo_ROCM() {
return nullptr;
}
ProviderInfo_MIGRAPHX& GetProviderInfo_MIGRAPHX() {
if (auto* info = TryGetProviderInfo_MIGRAPHX())
return *info;
ORT_THROW("MIGRAPHX Provider not available, can't get interface for it");
}
ProviderInfo_ROCM& GetProviderInfo_ROCM() {
if (auto* info = TryGetProviderInfo_ROCM())
return *info;

View file

@ -349,6 +349,8 @@ class InferenceSession(Session):
# Tensorrt can fall back to CUDA. All others fall back to CPU.
if 'TensorrtExecutionProvider' in available_providers:
self._fallback_providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
elif 'MIGraphXExecutionProvider' in available_providers:
self._fallback_providers = ['ROCMExecutionProvider', 'CPUExecutionProvider']
else:
self._fallback_providers = ['CPUExecutionProvider']

View file

@ -385,6 +385,12 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
} else if (provider_name == onnxruntime::kMIGraphXExecutionProvider) {
#ifdef USE_MIGRAPHX
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(session_options, 0));
OrtROCMProviderOptions rocm_options;
rocm_options.miopen_conv_exhaustive_search = performance_test_config.run_config.cudnn_conv_algo;
rocm_options.do_copy_in_default_stream = !performance_test_config.run_config.do_cuda_copy_in_separate_stream;
// TODO: Support arena configuration for users of perf test
session_options.AppendExecutionProvider_ROCM(rocm_options);
#else
ORT_THROW("MIGraphX is not supported in this build\n");
#endif

View file

@ -216,7 +216,7 @@ try:
if len(args) > 3:
subprocess.run(args, check=True, stdout=subprocess.PIPE)
dest = 'onnxruntime/capi/libonnxruntime_providers_tensorrt.so'
dest = 'onnxruntime/capi/libonnxruntime_providers_' + ('migraphx.so' if is_rocm else 'tensorrt.so')
if path.isfile(dest):
result = subprocess.run(['patchelf', '--print-needed', dest],
check=True, stdout=subprocess.PIPE, universal_newlines=True)
@ -253,21 +253,21 @@ except ImportError as error:
print(error)
bdist_wheel = None
providers_cuda_or_rocm = 'libonnxruntime_providers_rocm.so' if is_rocm else 'libonnxruntime_providers_cuda.so'
providers_cuda_or_rocm = 'libonnxruntime_providers_' + ('rocm.so' if is_rocm else 'cuda.so')
providers_tensorrt_or_migraphx = 'libonnxruntime_providers_' + ('migraphx.so' if is_rocm else 'tensorrt.so')
# Additional binaries
if platform.system() == 'Linux':
libs = ['onnxruntime_pybind11_state.so', 'libdnnl.so.2', 'libmklml_intel.so', 'libmklml_gnu.so', 'libiomp5.so',
'mimalloc.so']
dl_libs = ['libonnxruntime_providers_shared.so']
dl_libs.append(providers_cuda_or_rocm)
dl_libs.append('libonnxruntime_providers_tensorrt.so')
dl_libs.append(providers_tensorrt_or_migraphx)
# DNNL, TensorRT & OpenVINO EPs are built as shared libs
libs.extend(['libonnxruntime_providers_shared.so'])
libs.extend(['libonnxruntime_providers_dnnl.so'])
libs.extend(['libonnxruntime_providers_tensorrt.so'])
libs.extend(['libonnxruntime_providers_openvino.so'])
libs.append(providers_cuda_or_rocm)
libs.append(providers_tensorrt_or_migraphx)
# Nuphar Libs
libs.extend(['libtvm.so.0.5.1'])
if nightly_build:

View file

@ -2038,6 +2038,9 @@ def main():
if args.use_tensorrt:
args.use_cuda = True
if args.use_migraphx:
args.use_rocm = True
if args.build_wheel or args.gen_doc:
args.enable_pybind = True