Add amd migraphx execution provider to onnx runtime (#2929)

* Add amd migraphx execution provider to onnx runtime

* rename MiGraphX to MIGraphX

* remove unnecessary changes in migraphx_execution_provider.cc

* add migraphx EP to tests

* add input requests of the batchnorm operator

* add to support an onnx operator PRelu

* update migrapx dockerfile and removed one unused line

* sync submodules with mater branch

* fixed a small bug

* fix various bugs to run msft real models correctly

* some code cleanup

* fix python file format

* fixed a code style issue

* add default provider for migraphx execution provider

Co-authored-by: Shucai Xiao <Shucai.Xiao@amd.com>
This commit is contained in:
Paul Fultz II 2020-05-26 15:24:59 -05:00 committed by GitHub
parent 9d0534c0eb
commit 7759136610
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
36 changed files with 1957 additions and 18 deletions

View file

@ -84,6 +84,7 @@ option(tensorflow_C_PACKAGE_PATH "Path to tensorflow C package installation dir"
option(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS "Enable operator implemented in language other than cpp" OFF)
option(onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS "Dump node input shapes and output data to standard output when executing the model." OFF)
option(onnxruntime_USE_DML "Build with DirectML support" OFF)
option(onnxruntime_USE_MIGRAPHX "Build with AMDMIGraphX support" OFF)
option(onnxruntime_USE_WINML "Build with WinML support" OFF)
option(onnxruntime_USE_ACL "Build with ACL support" OFF)
option(onnxruntime_USE_ACL_1902 "Build with ACL version 1902 support" OFF)
@ -854,6 +855,14 @@ if (onnxruntime_USE_TENSORRT)
endif()
endif()
if (onnxruntime_USE_MIGRAPHX)
if (WIN32)
message(FATAL_ERROR "MIGraphX does not support build in Windows!")
endif()
set(AMD_MIGRAPHX_HOME ${onnxruntime_MIGRAPHX_HOME})
add_definitions(-DUSE_MIGRAPHX=1)
endif()
if (onnxruntime_USE_TVM)
if (WIN32 AND MSVC)
# wd4100: identifier' : unreferenced formal parameter

View file

@ -82,6 +82,7 @@ target_link_libraries(onnxruntime PRIVATE
${PROVIDERS_NNAPI}
${PROVIDERS_RKNPU}
${PROVIDERS_TENSORRT}
${PROVIDERS_MIGRAPHX}
${PROVIDERS_OPENVINO}
${PROVIDERS_NUPHAR}
${PROVIDERS_VITISAI}

View file

@ -22,6 +22,10 @@ if (onnxruntime_USE_TENSORRT)
STRING(APPEND CSHARP_PREPROCESSOR_DEFINES "USE_TENSORRT,")
endif()
if (onnxruntime_USE_MIGRAPHX)
STRING(APPEND CSHARP_PREPROCESSOR_DEFINES "USE_MIGRAPHX,")
endif()
if (onnxruntime_USE_OPENVINO)
STRING(APPEND CSHARP_PREPROCESSOR_DEFINES "USE_OPENVINO,")
endif()

View file

@ -67,6 +67,10 @@ if(onnxruntime_USE_DML)
set(PROVIDERS_DML onnxruntime_providers_dml)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES dml)
endif()
if(onnxruntime_USE_MIGRAPHX)
set(PROVIDERS_MIGRAPHX onnxruntime_providers_migraphx)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES migraphx)
endif()
if(onnxruntime_USE_OPENVINO)
set(PROVIDERS_OPENVINO onnxruntime_providers_openvino)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES openvino)
@ -607,6 +611,32 @@ if (onnxruntime_USE_DML)
set_target_properties(onnxruntime_providers_dml PROPERTIES FOLDER "ONNXRuntime")
endif()
if (onnxruntime_USE_MIGRAPHX)
# Add search paths for default rocm installation
list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hcc /opt/rocm/hip /opt/rocm)
find_package(hip)
find_package(migraphx PATHS ${AMD_MIGRAPHX_HOME})
set(migraphx_libs migraphx::c hip::host)
file(GLOB_RECURSE onnxruntime_providers_migraphx_cc_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/providers/migraphx/*.h"
"${ONNXRUNTIME_ROOT}/core/providers/migraphx/*.cc"
)
source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_migraphx_cc_srcs})
add_library(onnxruntime_providers_migraphx ${onnxruntime_providers_migraphx_cc_srcs})
target_link_libraries(onnxruntime_providers_migraphx PRIVATE ${migraphx_libs})
set_target_properties(onnxruntime_providers_migraphx PROPERTIES FOLDER "ONNXRuntime")
target_compile_options(onnxruntime_providers_migraphx PRIVATE -Wno-error=sign-compare)
target_include_directories(onnxruntime_providers_migraphx PRIVATE ${ONNXRUNTIME_ROOT})
onnxruntime_add_include_to_target(onnxruntime_providers_migraphx onnxruntime_common onnxruntime_framework onnx)
add_dependencies(onnxruntime_providers_migraphx ${onnxruntime_EXTERNAL_DEPENDENCIES})
install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/migraphx DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers)
set_target_properties(onnxruntime_providers_migraphx PROPERTIES LINKER_LANGUAGE CXX)
endif()
if (onnxruntime_USE_ACL)
add_definitions(-DUSE_ACL=1)
file(GLOB_RECURSE onnxruntime_providers_acl_cc_srcs

View file

@ -85,6 +85,7 @@ set(onnxruntime_pybind11_state_libs
${PROVIDERS_CUDA}
${PROVIDERS_DNNL}
${PROVIDERS_TENSORRT}
${PROVIDERS_MIGRAPHX}
${PROVIDERS_NGRAPH}
${PROVIDERS_OPENVINO}
${PROVIDERS_NUPHAR}

View file

@ -314,6 +314,11 @@ if(onnxruntime_USE_DML)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_dml)
endif()
if(onnxruntime_USE_MIGRAPHX)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_migraphx)
endif()
file(GLOB_RECURSE onnxruntime_test_tvm_src CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/test/tvm/*.h"
"${ONNXRUNTIME_ROOT}/test/tvm/*.cc"
@ -341,6 +346,7 @@ set(ONNXRUNTIME_TEST_LIBS
${PROVIDERS_CUDA}
${PROVIDERS_DNNL}
${PROVIDERS_TENSORRT}
${PROVIDERS_MIGRAPHX}
${PROVIDERS_NGRAPH}
${PROVIDERS_OPENVINO}
${PROVIDERS_NUPHAR}

View file

@ -444,6 +444,9 @@ namespace Microsoft.ML.OnnxRuntime
[DllImport(nativeLib, CharSet = charSet)]
public static extern IntPtr /*(OrtStatus*)*/ OrtSessionOptionsAppendExecutionProvider_Tensorrt(IntPtr /*(OrtSessionOptions*)*/ options, int device_id);
[DllImport(nativeLib, CharSet = charSet)]
public static extern IntPtr /*(OrtStatus*)*/ OrtSessionOptionsAppendExecutionProvider_MIGraphX(IntPtr /*(OrtSessionOptions*)*/ options, int device_id);
[DllImport(nativeLib, CharSet = charSet)]
public static extern IntPtr /*(OrtStatus*)*/ OrtSessionOptionsAppendExecutionProvider_Nnapi(IntPtr /*(OrtSessionOptions*)*/ options);

View file

@ -135,6 +135,14 @@ namespace Microsoft.ML.OnnxRuntime
NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_Tensorrt(_nativePtr, deviceId));
}
/// <summary>
/// Use only if you have the onnxruntime package specific to this Execution Provider.
/// </summary>
public void AppendExecutionProvider_MIGraphX(int deviceId)
{
NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsAppendExecutionProvider_MIGraphX(_nativePtr, deviceId));
}
/// <summary>
/// Use only if you have the onnxruntime package specific to this Execution Provider.
/// </summary>

View file

@ -97,6 +97,9 @@ namespace Microsoft.ML.OnnxRuntime.Tests
#if USE_TENSORRT
opt.AppendExecutionProvider_Tensorrt(0);
#endif
#if USE_MIGRAPHX
opt.AppendExecutionProvider_MIGraphX(0);
#endif
#if USE_NNAPI
opt.AppendExecutionProvider_Nnapi();
#endif
@ -1614,6 +1617,9 @@ namespace Microsoft.ML.OnnxRuntime.Tests
#if USE_TENSORRT
,"OrtSessionOptionsAppendExecutionProvider_Tensorrt"
#endif
#if USE_MIGRAPHX
,"OrtSessionOptionsAppendExecutionProvider_MIGraphX"
#endif
#if USE_NNAPI
,"OrtSessionOptionsAppendExecutionProvider_Nnapi"
#endif

View file

@ -0,0 +1,49 @@
# --------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------
# Dockerfile to run ONNXRuntime with MIGraphX integration
#--------------------------------------------------------------------------
FROM ubuntu:16.04
ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime
ARG ONNXRUNTIME_BRANCH=master
ENV DEBIAN_FRONTEND noninteractive
ENV LC_ALL C.UTF-8
ENV LANG C.UTF-8
ENV MIGRAPHX_DISABLE_FAST_GELU=1
# Install rocm
RUN apt-get update && apt-get install -y --no-install-recommends curl && \
curl -sL http://repo.radeon.com/rocm/apt/debian/rocm.gpg.key | apt-key add - && \
sh -c 'echo deb [arch=amd64] http://repo.radeon.com/rocm/apt/debian/ xenial main > /etc/apt/sources.list.d/rocm.list'
RUN apt-get update &&\
apt-get install -y sudo git bash build-essential cmake libpython3.5-dev python3-pip miopen-hip rocblas half
# Install rbuild
RUN pip3 install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz
# Install MIGraphX from source
RUN mkdir -p /migraphx
RUN cd /migraphx && git clone --depth=1 --branch migraphx_for_ort https://github.com/ROCmSoftwarePlatform/AMDMIGraphX src
RUN cd /migraphx && rbuild package --cxx /opt/rocm/bin/hcc -d /migraphx/deps -B /migraphx/build -S /migraphx/src/ -DPYTHON_EXECUTABLE=/usr/bin/python3
RUN dpkg -i /migraphx/build/*.deb
RUN rm -rf /migraphx
WORKDIR /code
ENV PATH /opt/miniconda/bin:/code/cmake-3.14.3-Linux-x86_64/bin:${PATH}
# Workaround for broken cmake in hip's binary package
RUN sed -i -e 's/hcc::hccrt;hcc::hc_am//g' /opt/rocm/hip/lib/cmake/hip/hip-targets-release.cmake
ENV CXXFLAGS "-D__HIP_PLATFORM_HCC__=1"
# Prepare onnxruntime repository & build onnxruntime
RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXRUNTIME_REPO} onnxruntime &&\
/bin/sh onnxruntime/dockerfiles/scripts/install_common_deps.sh &&\
cd onnxruntime &&\
/bin/sh ./build.sh --config Release --build_wheel --update --build --parallel --cmake_extra_defines ONNXRUNTIME_VERSION=$(cat ./VERSION_NUMBER) --use_migraphx &&\
pip install /code/onnxruntime/build/Linux/Release/dist/*.whl &&\
cd .. &&\
rm -rf onnxruntime cmake-3.14.3-Linux-x86_64

View file

@ -23,13 +23,14 @@ struct OrtDevice {
// Pre-defined device types.
static const DeviceType CPU = 0;
static const DeviceType GPU = 1; //CUDA
static const DeviceType GPU = 1; //CUDA or HIP
static const DeviceType FPGA = 2;
struct MemType {
// Pre-defined memory types.
static const MemoryType DEFAULT = 0;
static const MemoryType CUDA_PINNED = 1;
static const MemoryType HIP_PINNED = 2;
};
constexpr OrtDevice(DeviceType device_type_, MemoryType memory_type_, DeviceId device_id_)
@ -141,6 +142,8 @@ namespace onnxruntime {
constexpr const char* CPU = "Cpu";
constexpr const char* CUDA = "Cuda";
constexpr const char* CUDA_PINNED = "CudaPinned";
constexpr const char* MIGRAPHX = "MIGraphX";
constexpr const char* MIGRAPHX_PINNED = "MIGraphXPinned";
constexpr const char* TRT = "Tensorrt";
constexpr const char* TRT_PINNED = "TensorrtPinned";

View file

@ -22,6 +22,7 @@ constexpr const char* kMSNchwcDomain = "com.microsoft.nchwc";
constexpr const char* kMSFeaturizersDomain = "com.microsoft.mlfeaturizers";
constexpr const char* kMSDmlDomain = "com.microsoft.dml";
constexpr const char* kNGraphDomain = "com.intel.ai";
constexpr const char* kMIGraphXDomain = "";
constexpr const char* kVitisAIDomain = "com.xilinx";
constexpr const char* kCpuExecutionProvider = "CPUExecutionProvider";
constexpr const char* kCudaExecutionProvider = "CUDAExecutionProvider";
@ -34,5 +35,6 @@ constexpr const char* kTensorrtExecutionProvider = "TensorrtExecutionProvider";
constexpr const char* kNnapiExecutionProvider = "NnapiExecutionProvider";
constexpr const char* kRknpuExecutionProvider = "RknpuExecutionProvider";
constexpr const char* kDmlExecutionProvider = "DmlExecutionProvider";
constexpr const char* kMIGraphXExecutionProvider = "MIGraphXExecutionProvider";
constexpr const char* kAclExecutionProvider = "ACLExecutionProvider";
} // namespace onnxruntime

View file

@ -0,0 +1,15 @@
// Copyright 2019 AMD AMDMIGraphX
#include "onnxruntime_c_api.h"
#ifdef __cplusplus
extern "C" {
#endif
ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_MIGraphX, _In_ OrtSessionOptions* options, int device_id);
#ifdef __cplusplus
}
#endif

View file

@ -22,6 +22,7 @@
#include "onnxruntime/core/providers/nuphar/nuphar_provider_factory.h"
#include "onnxruntime/core/providers/openvino/openvino_provider_factory.h"
#include "onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h"
#include "onnxruntime/core/providers/migraphx/migraphx_provider_factory.h"
#include "onnxruntime/core/providers/acl/acl_provider_factory.h"
#ifdef USE_DIRECTML
#include "onnxruntime/core/providers/dml/dml_provider_factory.h"
@ -423,6 +424,22 @@ JNIEXPORT void JNICALL Java_ai_onnxruntime_OrtSession_00024SessionOptions_addNup
#endif
}
/*
* Class: ai_onnxruntime_OrtSession_SessionOptions
* Method: addMIGraphX
* Signature: (JJI)V
*/
JNIEXPORT void JNICALL Java_ai_onnxruntime_OrtSession_00024SessionOptions_addMIGraphX
(JNIEnv * jniEnv, jobject jobj, jlong apiHandle, jlong handle, jint deviceNum) {
(void)jobj;
#ifdef USE_MIGRAPHX
checkOrtStatus(jniEnv,(const OrtApi*)apiHandle,OrtSessionOptionsAppendExecutionProvider_MIGraphX((OrtSessionOptions*) handle, deviceNum));
#else
(void)apiHandle;(void)handle;(void)deviceNum; // Parameters used when MIGraphX is defined.
throwOrtException(jniEnv,convertErrorCode(ORT_INVALID_ARGUMENT),"This binary was not compiled with MIGraphX support.");
#endif
}
/*
* Class: ai_onnxruntime_OrtSession_SessionOptions
* Method: addDirectML

View file

@ -0,0 +1,60 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "migraphx_inc.h"
#include "gpu_data_transfer.h"
namespace onnxruntime {
GPUDataTransfer::GPUDataTransfer() {
// create streams, default is nullptr
streams_[kHipStreamDefault] = nullptr;
hipStreamCreateWithFlags(&streams_[kHipStreamCopyIn], hipStreamNonBlocking);
hipStreamCreateWithFlags(&streams_[kHipStreamCopyOut], hipStreamNonBlocking);
}
GPUDataTransfer::~GPUDataTransfer() {
hipStreamDestroy(streams_[kHipStreamCopyIn]);
hipStreamDestroy(streams_[kHipStreamCopyOut]);
}
bool GPUDataTransfer::CanCopy(const OrtDevice& src_device, const OrtDevice& dst_device) const {
return src_device.Type() == OrtDevice::GPU || src_device.MemType() == OrtDevice::MemType::HIP_PINNED
|| dst_device.Type() == OrtDevice::GPU || dst_device.MemType() == OrtDevice::MemType::HIP_PINNED;
}
common::Status GPUDataTransfer::CopyTensor(const Tensor& src, Tensor& dst, int exec_queue_id) const {
size_t bytes = src.SizeInBytes();
const void* src_data = src.DataRaw();
void* dst_data = dst.MutableDataRaw();
auto& src_device = src.Location().device;
auto& dst_device = dst.Location().device;
if (dst_device.Type() == OrtDevice::GPU) {
if (src_device.Type() == OrtDevice::CPU && src_device.MemType() == OrtDevice::MemType::HIP_PINNED) {
// copy from pinned memory to GPU, this is non-blocking
hipMemcpyAsync(dst_data, src_data, bytes, hipMemcpyHostToDevice, streams_[exec_queue_id]);
} else if (src_device.Type() == OrtDevice::GPU) {
// copying between GPU, this is non-blocking
hipMemcpyAsync(dst_data, src_data, bytes, hipMemcpyDeviceToDevice, streams_[kHipStreamDefault]);
} else {
// copy from other CPU memory to GPU, this is blocking
hipMemcpy(dst_data, src_data, bytes, hipMemcpyHostToDevice);
}
} else if (src_device.Type() == OrtDevice::GPU) {
if (dst_device.Type() == OrtDevice::CPU && dst_device.MemType() == OrtDevice::MemType::HIP_PINNED) {
// copying from GPU to pinned memory, this is non-blocking
hipMemcpyAsync(dst_data, src_data, bytes, hipMemcpyDeviceToHost, streams_[exec_queue_id]);
} else {
// copying from GPU to CPU memory, this is blocking
hipMemcpy(dst_data, src_data, bytes, hipMemcpyDeviceToHost);
}
} else {
// copying between cpu memory
memcpy(dst_data, src_data, bytes);
}
return Status::OK();
}
} // namespace onnxruntime

View file

@ -0,0 +1,36 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include "migraphx_inc.h"
#include "core/framework/data_transfer.h"
namespace onnxruntime {
enum HIPStreamType : int {
kHipStreamDefault = 0,
kHipStreamCopyIn,
kHipStreamCopyOut,
kTotalHipStreams,
};
class GPUDataTransfer : public IDataTransfer {
public:
GPUDataTransfer();
~GPUDataTransfer();
bool CanCopy(const OrtDevice& src_device, const OrtDevice& dst_device) const override;
common::Status CopyTensor(const Tensor& src, Tensor& dst, int exec_queue_id) const override;
hipStream_t GetStream(int queue_id) const {
ORT_ENFORCE(queue_id >= 0 && queue_id < kTotalHipStreams);
return streams_[queue_id];
}
private:
hipStream_t streams_[kTotalHipStreams];
};
} // namespace onnxruntime

View file

@ -0,0 +1,71 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "migraphx_inc.h"
#include "hip_allocator.h"
#include "core/framework/allocatormgr.h"
#include "core/framework/session_state.h"
#include "hip_fence.h"
#include "gpu_data_transfer.h"
namespace onnxruntime {
static const GPUDataTransfer* GetGPUDataTransfer(const SessionState* session_state) {
OrtDevice gpu_device(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, 0);
OrtDevice cpu_device;
return dynamic_cast<const GPUDataTransfer*>(session_state->GetDataTransferMgr().GetDataTransfer(gpu_device, cpu_device));
}
void HIPAllocator::CheckDevice() const {
#ifndef NDEBUG
// check device to match at debug build
// if it's expected to change, call hipSetDevice instead of the check
int current_device;
hipGetDevice(&current_device);
ORT_ENFORCE(current_device == info_.id);
#endif
}
void* HIPAllocator::Alloc(size_t size) {
CheckDevice();
void* p = nullptr;
if (size > 0) {
hipMalloc((void**)&p, size);
}
return p;
}
void HIPAllocator::Free(void* p) {
CheckDevice();
hipFree(p); // do not throw error since it's OK for hipFree to fail during shutdown
}
const OrtMemoryInfo& HIPAllocator::Info() const {
return info_;
}
FencePtr HIPAllocator::CreateFence(const SessionState* session_state) {
return std::make_shared<HIPFence>(GetGPUDataTransfer(session_state));
}
void* HIPPinnedAllocator::Alloc(size_t size) {
void* p = nullptr;
if (size > 0) {
hipHostMalloc((void**)&p, size);
}
return p;
}
void HIPPinnedAllocator::Free(void* p) {
hipHostFree(p);
}
const OrtMemoryInfo& HIPPinnedAllocator::Info() const {
return info_;
}
FencePtr HIPPinnedAllocator::CreateFence(const SessionState* session_state) {
return std::make_shared<HIPFence>(GetGPUDataTransfer(session_state));
}
} // namespace onnxruntime

View file

@ -0,0 +1,38 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include "core/framework/allocator.h"
namespace onnxruntime {
class HIPAllocator : public IDeviceAllocator {
public:
HIPAllocator(int device_id, const char* name) : info_(name, OrtAllocatorType::OrtDeviceAllocator, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, device_id), device_id, OrtMemTypeDefault) {}
virtual void* Alloc(size_t size) override;
virtual void Free(void* p) override;
virtual const OrtMemoryInfo& Info() const override;
virtual FencePtr CreateFence(const SessionState* session_state) override;
private:
void CheckDevice() const;
private:
const OrtMemoryInfo info_;
};
//TODO: add a default constructor
class HIPPinnedAllocator : public IDeviceAllocator {
public:
HIPPinnedAllocator(int device_id, const char* name) : info_(name, OrtAllocatorType::OrtDeviceAllocator, OrtDevice(OrtDevice::CPU, OrtDevice::MemType::HIP_PINNED, device_id), device_id, OrtMemTypeCPUOutput) {}
virtual void* Alloc(size_t size) override;
virtual void Free(void* p) override;
virtual const OrtMemoryInfo& Info() const override;
virtual FencePtr CreateFence(const SessionState* session_state) override;
private:
const OrtMemoryInfo info_;
};
} // namespace onnxruntime

View file

@ -0,0 +1,53 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "migraphx_inc.h"
#include "hip_fence.h"
#include "gpu_data_transfer.h"
namespace onnxruntime {
HIPFence::HIPFence(const GPUDataTransfer* data_transfer) : data_transfer_(data_transfer) {
hipEventCreate(&read_event_);
hipEventCreate(&write_event_);
}
HIPFence::~HIPFence() {
hipEventDestroy(read_event_);
hipEventDestroy(write_event_);
}
void HIPFence::BeforeUsingAsInput(onnxruntime::ProviderType provider_type, int async_queue_id) {
(void)provider_type;
(void)async_queue_id;
// sync on CPU for all other providers, this is blocking
hipEventSynchronize(write_event_);
}
void HIPFence::BeforeUsingAsOutput(onnxruntime::ProviderType provider_type, int queue_id) {
(void)provider_type;
(void)queue_id;
// sync on CPU for all other providers, this is blocking
hipEventSynchronize(read_event_);
hipEventSynchronize(write_event_);
}
bool HIPFence::CanRelease() {
return hipEventQuery(read_event_) == hipSuccess &&
hipEventQuery(write_event_) == hipSuccess;
}
void HIPFence::AfterUsedAsInput(int queue_id) {
// update read fence
hipStream_t stream = data_transfer_->GetStream(queue_id);
hipEventRecord(read_event_, stream);
}
void HIPFence::AfterUsedAsOutput(int queue_id) {
// update write fence
hipStream_t stream = data_transfer_->GetStream(queue_id);
hipEventRecord(write_event_, stream);
}
} // namespace onnxruntime

View file

@ -0,0 +1,27 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include "core/framework/tensor.h"
#include "core/graph/basic_types.h"
namespace onnxruntime {
class GPUDataTransfer;
class HIPFence : public IFence {
public:
HIPFence(const GPUDataTransfer* data_transfer);
virtual ~HIPFence();
virtual void BeforeUsingAsInput(onnxruntime::ProviderType provider_type, int queue_id) override;
virtual void BeforeUsingAsOutput(onnxruntime::ProviderType provider_type, int queue_id) override;
virtual void AfterUsedAsInput(int queue_id) override;
virtual void AfterUsedAsOutput(int queue_id) override;
virtual bool CanRelease() override;
private:
hipEvent_t read_event_;
hipEvent_t write_event_;
const GPUDataTransfer* data_transfer_;
};
} // namespace onnxruntime

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,63 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
#pragma once
#include "core/framework/execution_provider.h"
#include "core/platform/ort_mutex.h"
#include <map>
#include "migraphx_inc.h"
namespace onnxruntime {
// Information needed to construct amdmigraphx execution providers.
struct MIGraphXExecutionProviderInfo {
std::string target_device;
int device_id {0};
};
// Information to construct kernel function state.
struct MIGraphXFuncState {
AllocateFunc allocate_func = nullptr;
DestroyFunc release_func = nullptr;
AllocatorHandle allocate_handle = nullptr;
migraphx::program prog{};
std::string onnx_string;
migraphx::onnx_options options;
migraphx::target t{};
std::unordered_map<std::string, std::size_t> input_name_indexes;
OrtMutex* mgx_mu_ptr = nullptr;
bool no_input_shape = false;
};
// Logical device representation.
class MIGraphXExecutionProvider : public IExecutionProvider {
public:
explicit MIGraphXExecutionProvider(const MIGraphXExecutionProviderInfo& info);
~MIGraphXExecutionProvider() = default;
std::vector<std::unique_ptr<ComputeCapability>>
GetCapability(const onnxruntime::GraphViewer& graph_viewer,
const std::vector<const KernelRegistry*>& kernel_registries) const override;
Status Compile(const std::vector<onnxruntime::Node*>& fused_nodes,
std::vector<NodeComputeInfo>& node_compute_funcs) override;
virtual std::shared_ptr<KernelRegistry> GetKernelRegistry() const override;
std::unique_ptr<onnxruntime::IDataTransfer> GetDataTransfer() const override;
AllocatorPtr GetAllocator(int id, OrtMemType mem_type) const override;
private:
int device_id_;
migraphx::target t_;
OrtMutex mgx_mu_;
std::unordered_map<std::string, migraphx::program> map_progs_;
std::unordered_map<std::string, std::string> map_onnx_string_;
std::unordered_map<std::string, std::unordered_map<std::string, std::size_t>> map_input_index_;
std::unordered_map<std::string, bool> map_no_input_shape_;
AllocatorPtr allocator_;
};
}

View file

@ -0,0 +1,8 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
#pragma once
#include <hip/hip_runtime.h>
#include <migraphx/migraphx.h>
#include <migraphx/migraphx.hpp>

View file

@ -0,0 +1,36 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
#include "core/providers/migraphx/migraphx_provider_factory.h"
#include <atomic>
#include "migraphx_execution_provider.h"
#include "core/session/abi_session_options_impl.h"
using namespace onnxruntime;
namespace onnxruntime {
struct MIGraphXProviderFactory : IExecutionProviderFactory {
MIGraphXProviderFactory(int device_id) : device_id_(device_id) {}
~MIGraphXProviderFactory() = default;
std::unique_ptr<IExecutionProvider> CreateProvider() override {
MIGraphXExecutionProviderInfo info;
info.device_id = device_id_;
info.target_device = "gpu";
return std::make_unique<MIGraphXExecutionProvider>(info);
}
private:
int device_id_;
};
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGraphX(int device_id) {
return std::make_shared<onnxruntime::MIGraphXProviderFactory>(device_id);
}
} // namespace onnxruntime
ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_MIGraphX, _In_ OrtSessionOptions* options, int device_id) {
options->provider_factories.push_back(onnxruntime::CreateExecutionProviderFactory_MIGraphX(device_id));
return nullptr;
}

View file

@ -0,0 +1 @@
OrtSessionOptionsAppendExecutionProvider_MIGraphX

View file

@ -52,6 +52,12 @@
#define BACKEND_NGRAPH ""
#endif
#if USE_MIGRAPHX
#define BACKEND_MIGRAPHX "-MIGRAPHX"
#else
#define BACKEND_MIGRAPHX ""
#endif
#ifdef USE_OPENVINO
#if OPENVINO_CONFIG_CPU_FP32
#define BACKEND_OPENVINO "-OPENVINO_CPU_FP32"
@ -94,7 +100,7 @@
#define BACKEND_OPENBLAS ""
#endif
#define BACKEND_DEVICE BACKEND_PROC BACKEND_DNNL BACKEND_MKLML BACKEND_NGRAPH BACKEND_NUPHAR BACKEND_OPENBLAS BACKEND_OPENVINO
#define BACKEND_DEVICE BACKEND_PROC BACKEND_DNNL BACKEND_MKLML BACKEND_NGRAPH BACKEND_OPENVINO BACKEND_NUPHAR BACKEND_OPENBLAS BACKEND_MIGRAPHX
#include "core/session/onnxruntime_cxx_api.h"
#include "core/providers/providers.h"
#include "core/providers/cpu/cpu_execution_provider.h"
@ -109,6 +115,9 @@ onnxruntime::ArenaExtendStrategy arena_extend_strategy = onnxruntime::ArenaExten
#ifdef USE_TENSORRT
#include "core/providers/tensorrt/tensorrt_provider_factory.h"
#endif
#ifdef USE_MIGRAPHX
#include "core/providers/migraphx/migraphx_provider_factory.h"
#endif
#ifdef USE_NGRAPH
#include "core/providers/ngraph/ngraph_provider_factory.h"
#endif
@ -130,6 +139,7 @@ std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_CUDA(O
size_t cuda_mem_limit,
onnxruntime::ArenaExtendStrategy arena_extend_strategy);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(int device_id);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGraphX(int device_id);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(int use_arena);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_NGraph(const char* ng_backend_type);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const char* device);
@ -277,7 +287,7 @@ inline void RegisterExecutionProvider(InferenceSession* sess, onnxruntime::IExec
const std::vector<std::string>& GetAllProviders() {
static std::vector<std::string> all_providers = {kTensorrtExecutionProvider, kCudaExecutionProvider, kDnnlExecutionProvider,
kNGraphExecutionProvider, kOpenVINOExecutionProvider, kNupharExecutionProvider,
kVitisAIExecutionProvider, kCpuExecutionProvider};
kVitisAIExecutionProvider, kCpuExecutionProvider, kMIGraphXExecutionProvider};
return all_providers;
}
@ -287,6 +297,9 @@ const std::vector<std::string>& GetAvailableProviders() {
#ifdef USE_TENSORRT
available_providers.push_back(kTensorrtExecutionProvider);
#endif
#ifdef USE_MIGRAPHX
available_providers.push_back(kMIGraphXExecutionProvider);
#endif
#ifdef USE_CUDA
available_providers.push_back(kCudaExecutionProvider);
#endif
@ -318,6 +331,10 @@ void RegisterExecutionProviders(InferenceSession* sess, const std::vector<std::s
} else if (type == kTensorrtExecutionProvider) {
#ifdef USE_TENSORRT
RegisterExecutionProvider(sess, *onnxruntime::CreateExecutionProviderFactory_Tensorrt(0));
#endif
} else if (type == kMIGraphXExecutionProvider) {
#ifdef USE_MIGRAPHX
RegisterExecutionProvider(sess, *onnxruntime::CreateExecutionProviderFactory_MIGraphX(0));
#endif
} else if (type == kCudaExecutionProvider) {
#ifdef USE_CUDA
@ -434,7 +451,10 @@ void addGlobalMethods(py::module& m, const Environment& env) {
onnxruntime::CreateExecutionProviderFactory_OpenVINO(openvino_device),
#endif
#ifdef USE_TENSORRT
onnxruntime::CreateExecutionProviderFactory_Tensorrt(0)
onnxruntime::CreateExecutionProviderFactory_Tensorrt(0),
#endif
#ifdef USE_MIGRAPHX
onnxruntime::CreateExecutionProviderFactory_MIGraphX(0)
#endif
#ifdef USE_VITISAI
onnxruntime::CreateExecutionProviderFactory_VitisAI("DPU", 0),

View file

@ -18,7 +18,7 @@ TEST(RecordTest, CommonDataStructureTest) {
std::tuple<std::string, float> values("streamLength", 2.0f);
Record<std::string, float> record(names, values);
const std::string* name;
const std::string* name = nullptr;
auto status = record.GetName(2, &name);
EXPECT_FALSE(status.IsOK());

View file

@ -37,7 +37,7 @@ void usage() {
"\t-v: verbose\n"
"\t-n [test_case_name]: Specifies a single test case to run.\n"
"\t-e [EXECUTION_PROVIDER]: EXECUTION_PROVIDER could be 'cpu', 'cuda', 'dnnl', 'tensorrt', 'ngraph', "
"'openvino', 'nuphar', or 'acl'. "
"'openvino', 'nuphar', 'migraphx' or 'acl'. "
"Default: 'cpu'.\n"
"\t-x: Use parallel executor, default (without -x): sequential executor.\n"
"\t-d [device_id]: Specifies the device id for multi-device (e.g. GPU). The value should > 0\n"
@ -101,6 +101,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
bool enable_nnapi = false;
bool enable_dml = false;
bool enable_acl = false;
bool enable_migraphx = false;
int device_id = 0;
GraphOptimizationLevel graph_optimization_level = ORT_ENABLE_ALL;
bool user_graph_optimization_level_set = false;
@ -167,7 +168,9 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
enable_dml = true;
} else if (!CompareCString(optarg, ORT_TSTR("acl"))) {
enable_acl = true;
} else {
} else if (!CompareCString(optarg, ORT_TSTR("migraphx"))) {
enable_migraphx = true;
}else {
usage();
return -1;
}
@ -363,6 +366,14 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
return -1;
#endif
}
if (enable_migraphx) {
#ifdef USE_MIGRAPHX
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(sf, device_id));
#else
fprintf(stderr, "MIGRAPHX is not supported in this build");
return -1;
#endif
}
if (user_graph_optimization_level_set) {
sf.SetGraphOptimizationLevel(graph_optimization_level);

View file

@ -85,6 +85,12 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
performance_test_config.run_config.enable_cpu_mem_arena ? 1 : 0));
#else
ORT_THROW("Acl is not supported in this build\n");
#endif
} else if (provider_name == onnxruntime::kMIGraphXExecutionProvider) {
#ifdef USE_MIGRAPHX
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(session_options, 0));
#else
ORT_THROW("MIGraphX is not supported in this build\n");
#endif
} else if (!provider_name.empty() && provider_name != onnxruntime::kCpuExecutionProvider) {
ORT_THROW("This backend is not included in perf test runner.\n");

View file

@ -17,7 +17,6 @@ pytest_plugins = 'onnx.backend.test.report',
class OrtBackendTest(onnx.backend.test.BackendTest):
def __init__(self, backend, parent_module=None):
super(OrtBackendTest, self).__init__(backend, parent_module)
@ -42,9 +41,11 @@ def create_backend_test(testname=None):
backend_test.include(testname + '.*')
else:
# read filters data
with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'testdata', 'onnx_backend_test_series_filters.jsonc')) as f:
with open(
os.path.join(os.path.dirname(os.path.realpath(__file__)), 'testdata',
'onnx_backend_test_series_filters.jsonc')) as f:
filters_lines = f.readlines()
filters_lines = [x.split('//')[0] for x in filters_lines]
filters_lines = [x.split('//')[0] for x in filters_lines]
filters = json.loads('\n'.join(filters_lines))
current_failing_tests = filters['current_failing_tests']
@ -70,11 +71,23 @@ def create_backend_test(testname=None):
if c2.supports_device('OPENVINO_CPU_FP32'):
current_failing_tests += filters['current_failing_tests_OPENVINO_CPU_FP32']
if c2.supports_device('MIGRAPHX'):
current_failing_tests += [
'^test_constant_pad_cpu', '^test_softmax_axis_1_cpu', '^test_softmax_axis_0_cpu',
'^test_softmax_default_axis_cpu', '^test_round_cpu', '^test_lrn_default_cpu', '^test_lrn_cpu',
'^test_logsoftmax_axis_0_cpu', '^test_logsoftmax_axis_1_cpu', '^test_logsoftmax_default_axis_cpu',
'^test_dynamicquantizelinear_expanded_cpu', '^test_dynamicquantizelinear_max_adjusted_cpu',
'^test_dynamicquantizelinear_max_adjusted_expanded_cpu', '^test_dynamicquantizelinear_min_adjusted_cpu',
'^test_dynamicquantizelinear_min_adjusted_expanded_cpu',
'^test_range_float_type_positive_delta_expanded_cpu',
'^test_range_int32_type_negative_delta_expanded_cpu', '^test_operator_symbolic_override_nested_cpu'
]
filters = current_failing_tests + \
filters['tests_with_pre_opset7_dependencies'] + \
filters['unsupported_usages'] + \
filters['failing_permanently'] + \
filters['test_with_types_disabled_due_to_binary_size_concerns']
filters['tests_with_pre_opset7_dependencies'] + \
filters['unsupported_usages'] + \
filters['failing_permanently'] + \
filters['test_with_types_disabled_due_to_binary_size_concerns']
backend_test.exclude('(' + '|'.join(filters) + ')')
print('excluded tests:', filters)
@ -92,7 +105,8 @@ def parse_args():
# Add an argument to match a single test name, by adding the name to the 'include' filter.
# Using -k with python unittest (https://docs.python.org/3/library/unittest.html#command-line-options)
# doesn't work as it filters on the test method name (Runner._add_model_test) rather than inidividual test case names.
# doesn't work as it filters on the test method name (Runner._add_model_test) rather than inidividual
# test case names.
parser.add_argument(
'-t',
'--test-name',

View file

@ -20,6 +20,7 @@ std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Nuphar
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Nnapi();
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Rknpu();
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(int device_id);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGraphX(int device_id);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_ACL(int use_arena);
namespace test {
@ -36,6 +37,14 @@ std::unique_ptr<IExecutionProvider> DefaultTensorrtExecutionProvider() {
#endif
}
std::unique_ptr<IExecutionProvider> DefaultMIGraphXExecutionProvider() {
#ifdef USE_MIGRAPHX
return CreateExecutionProviderFactory_MIGraphX(0)->CreateProvider();
#else
return nullptr;
#endif
}
std::unique_ptr<IExecutionProvider> DefaultOpenVINOExecutionProvider() {
#ifdef USE_OPENVINO
return CreateExecutionProviderFactory_OpenVINO("")->CreateProvider();

View file

@ -13,6 +13,7 @@ std::unique_ptr<IExecutionProvider> DefaultDnnlExecutionProvider(bool enable_are
std::unique_ptr<IExecutionProvider> DefaultNGraphExecutionProvider();
std::unique_ptr<IExecutionProvider> DefaultNupharExecutionProvider(bool allow_unaligned_buffers = true);
std::unique_ptr<IExecutionProvider> DefaultTensorrtExecutionProvider();
std::unique_ptr<IExecutionProvider> DefaultMIGraphXExecutionProvider();
std::unique_ptr<IExecutionProvider> DefaultOpenVINOExecutionProvider();
std::unique_ptr<IExecutionProvider> DefaultNnapiExecutionProvider();
std::unique_ptr<IExecutionProvider> DefaultRknpuExecutionProvider();

View file

@ -31,3 +31,7 @@
#ifdef USE_ACL
#include "core/providers/acl/acl_provider_factory.h"
#endif
#ifdef USE_MIGRAPHX
#include "core/providers/migraphx/migraphx_provider_factory.h"
#endif

View file

@ -22,3 +22,6 @@
#ifdef USE_DML
#include "onnxruntime/core/providers/dml/dml_provider_factory.h"
#endif
#ifdef USE_MIGRAPHX
#include "onnxruntime/core/providers/migraphx/migraphx_provider_factory.h"
#endif

@ -1 +1 @@
Subproject commit 352281313fe1c4313bc222cb9de222afd50c822f
Subproject commit 23f0cdf9014650c79e214c2d0e935ab0f8821cc5

View file

@ -284,6 +284,10 @@ def parse_arguments():
"--use_tensorrt", action='store_true', help="Build with TensorRT")
parser.add_argument(
"--tensorrt_home", help="Path to TensorRT installation dir")
parser.add_argument(
"--use_migraphx", action='store_true', help="Build with MIGraphX")
parser.add_argument(
"--migraphx_home", help="Path to MIGraphX installation dir")
parser.add_argument(
"--use_full_protobuf", action='store_true',
help="Use the full protobuf library")
@ -509,7 +513,7 @@ def setup_test_data(build_dir, configs):
def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home,
cudnn_home, tensorrt_home, path_to_protoc_exe, configs,
cudnn_home, tensorrt_home, migraphx_home, path_to_protoc_exe, configs,
cmake_extra_defines, args, cmake_extra_args):
log.info("Generating CMake build tree")
cmake_dir = os.path.join(source_dir, "cmake")
@ -582,6 +586,9 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home,
"-Donnxruntime_USE_TENSORRT=" + ("ON" if args.use_tensorrt else "OFF"),
"-Donnxruntime_TENSORRT_HOME=" + (
tensorrt_home if args.use_tensorrt else ""),
# set vars for migraphx
"-Donnxruntime_USE_MIGRAPHX=" + ("ON" if args.use_migraphx else "OFF"),
"-Donnxruntime_MIGRAPHX_HOME=" + (migraphx_home if args.use_migraphx else ""),
# By default - we currently support only cross compiling for
# ARM/ARM64 (no native compilation supported through this
# script).
@ -994,6 +1001,23 @@ def setup_tensorrt_vars(args):
return tensorrt_home
def setup_migraphx_vars(args):
migraphx_home = None
if (args.use_migraphx):
print("migraphx_home = {}".format(args.migraphx_home))
migraphx_home = args.migraphx_home or os.getenv("MIGRAPHX_HOME") or None
migraphx_home_not_valid = (migraphx_home and not os.path.exists(migraphx_home))
if (migraphx_home_not_valid):
raise BuildError("migraphx_home paths must be specified and valid.",
"migraphx_home='{}' valid={}."
.format(migraphx_home, migraphx_home_not_valid))
return migraphx_home or ''
def setup_dml_build(args, cmake_path, build_dir, configs):
if args.use_dml:
for config in configs:
@ -1561,6 +1585,9 @@ def main():
# if using tensorrt, setup tensorrt paths
tensorrt_home = setup_tensorrt_vars(args)
# if using migraphx, setup migraphx paths
migraphx_home = setup_migraphx_vars(args)
os.makedirs(build_dir, exist_ok=True)
log.info("Build started")
@ -1645,7 +1672,7 @@ def main():
setup_test_data(build_dir, configs)
generate_build_tree(
cmake_path, source_dir, build_dir, cuda_home, cudnn_home,
tensorrt_home, path_to_protoc_exe, configs, cmake_extra_defines,
tensorrt_home, migraphx_home, path_to_protoc_exe, configs, cmake_extra_defines,
args, cmake_extra_args)
if args.clean: