mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-31 23:27:43 +00:00
Enable Gpu multi-device test for CUDA EP and Trt EP
Enable multi-device test for GPU * Add build pipeline for TensorRT multi-GPU test * Add code to disable fp16 test if hardware architecture not supported * Add option to set the device id in onnx_test_runner for model tests
This commit is contained in:
parent
f93be8af90
commit
640f71c91b
6 changed files with 54 additions and 12 deletions
|
|
@ -43,6 +43,7 @@ void usage() {
|
|||
"'openvino' or 'nuphar'. "
|
||||
"Default: 'cpu'.\n"
|
||||
"\t-x: Use parallel executor, default (without -x): sequential executor.\n"
|
||||
"\t-d [device_id]: Specifies the device id for multi-device (e.g. GPU). The value should > 0\n"
|
||||
"\t-o [optimization level]: Default is 1. Valid values are 0 (disable), 1 (basic), 2 (extended), 99 (all).\n"
|
||||
"\t\tPlease see onnxruntime_c_api.h (enum GraphOptimizationLevel) for the full list of all optimization levels. "
|
||||
"\n"
|
||||
|
|
@ -101,13 +102,14 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
|
|||
bool enable_mem_pattern = true;
|
||||
bool enable_openvino = false;
|
||||
bool enable_nnapi = false;
|
||||
int device_id = 0;
|
||||
GraphOptimizationLevel graph_optimization_level = ORT_DISABLE_ALL;
|
||||
bool user_graph_optimization_level_set = false;
|
||||
|
||||
OrtLoggingLevel logging_level = ORT_LOGGING_LEVEL_WARNING;
|
||||
{
|
||||
int ch;
|
||||
while ((ch = getopt(argc, argv, ORT_TSTR("Ac:hj:Mn:r:e:xvo:"))) != -1) {
|
||||
while ((ch = getopt(argc, argv, ORT_TSTR("Ac:hj:Mn:r:e:xvo:d:"))) != -1) {
|
||||
switch (ch) {
|
||||
case 'A':
|
||||
enable_cpu_mem_arena = false;
|
||||
|
|
@ -197,6 +199,13 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
|
|||
user_graph_optimization_level_set = true;
|
||||
break;
|
||||
}
|
||||
case 'd':
|
||||
device_id = static_cast<int>(OrtStrtol<PATH_CHAR_TYPE>(optarg, nullptr));
|
||||
if (device_id < 0) {
|
||||
usage();
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
case '?':
|
||||
case 'h':
|
||||
default:
|
||||
|
|
@ -251,8 +260,8 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
|
|||
|
||||
if (enable_tensorrt) {
|
||||
#ifdef USE_TENSORRT
|
||||
ORT_THROW_ON_ERROR(OrtSessionOptionsAppendExecutionProvider_Tensorrt(sf, 0));
|
||||
ORT_THROW_ON_ERROR(OrtSessionOptionsAppendExecutionProvider_CUDA(sf, 0));
|
||||
ORT_THROW_ON_ERROR(OrtSessionOptionsAppendExecutionProvider_Tensorrt(sf, device_id));
|
||||
ORT_THROW_ON_ERROR(OrtSessionOptionsAppendExecutionProvider_CUDA(sf, device_id));
|
||||
#else
|
||||
fprintf(stderr, "TensorRT is not supported in this build");
|
||||
return -1;
|
||||
|
|
@ -269,7 +278,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
|
|||
}
|
||||
if (enable_cuda) {
|
||||
#ifdef USE_CUDA
|
||||
ORT_THROW_ON_ERROR(OrtSessionOptionsAppendExecutionProvider_CUDA(sf, 0));
|
||||
ORT_THROW_ON_ERROR(OrtSessionOptionsAppendExecutionProvider_CUDA(sf, device_id));
|
||||
#else
|
||||
fprintf(stderr, "CUDA is not supported in this build");
|
||||
return -1;
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include "gtest/gtest.h"
|
||||
#include "test/providers/provider_test_utils.h"
|
||||
#include "test/common/cuda_op_test_utils.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace test {
|
||||
|
|
@ -29,6 +30,11 @@ TEST(GemmOpTest, GemmNoTrans) {
|
|||
// Only CUDA kernel has float 16 support
|
||||
#ifdef USE_CUDA
|
||||
TEST(GemmOpTest, GemmNoTrans_f16) {
|
||||
int min_cuda_architecture = 530;
|
||||
if (!HasCudaEnvironment(min_cuda_architecture)) {
|
||||
LOGS_DEFAULT(WARNING) << "Hardware NOT support FP16";
|
||||
return;
|
||||
}
|
||||
OpTester test("Gemm");
|
||||
|
||||
test.AddAttribute("transA", (int64_t)0);
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
#include "core/providers/cpu/nn/pool.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "test/providers/provider_test_utils.h"
|
||||
#include "test/common/cuda_op_test_utils.h"
|
||||
using namespace std;
|
||||
namespace onnxruntime {
|
||||
namespace test {
|
||||
|
|
@ -58,6 +59,11 @@ TEST(PoolTest, MaxPool) {
|
|||
// Disable for now, still investigating the issue with cudnn lib
|
||||
#ifdef USE_CUDA
|
||||
TEST(PoolTest, MaxPool_F16) {
|
||||
int min_cuda_architecture = 530;
|
||||
if (!HasCudaEnvironment(min_cuda_architecture)) {
|
||||
LOGS_DEFAULT(WARNING) << "Hardware NOT support FP16";
|
||||
return;
|
||||
}
|
||||
OpTester test("MaxPool");
|
||||
|
||||
test.AddAttribute("auto_pad", "");
|
||||
|
|
|
|||
|
|
@ -158,6 +158,7 @@ Use the individual flags to only run the specified stages.
|
|||
parser.add_argument("--enable_language_interop_ops", action='store_true', help="Enable operator implemented in language other than cpp")
|
||||
parser.add_argument("--cmake_generator", choices=['Visual Studio 15 2017', 'Visual Studio 16 2019'],
|
||||
default='Visual Studio 15 2017', help="Specify the generator that CMake invokes. This is only supported on Windows")
|
||||
parser.add_argument("--enable_multi_device_test", action='store_true', help="Test with multi-device. Mostly used for multi-device GPU")
|
||||
return parser.parse_args()
|
||||
|
||||
def resolve_executable_path(command_or_path):
|
||||
|
|
@ -608,7 +609,7 @@ def run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs, enab
|
|||
if onnxml_test:
|
||||
run_subprocess([sys.executable, 'onnxruntime_test_python_keras.py'], cwd=cwd, dll_path=dll_path)
|
||||
|
||||
def run_onnx_tests(build_dir, configs, onnx_test_data_dir, provider, enable_parallel_executor_test, num_parallel_models):
|
||||
def run_onnx_tests(build_dir, configs, onnx_test_data_dir, provider, enable_multi_device_test, enable_parallel_executor_test, num_parallel_models):
|
||||
for config in configs:
|
||||
cwd = get_config_build_dir(build_dir, config)
|
||||
if is_windows():
|
||||
|
|
@ -630,6 +631,9 @@ def run_onnx_tests(build_dir, configs, onnx_test_data_dir, provider, enable_para
|
|||
if num_parallel_models > 0:
|
||||
cmd += ["-j", str(num_parallel_models)]
|
||||
|
||||
if enable_multi_device_test:
|
||||
cmd += ['-d', '1']
|
||||
|
||||
if config != 'Debug' and os.path.exists(model_dir):
|
||||
# some models in opset9 and above are not supported by TensorRT yet
|
||||
if provider == 'tensorrt':
|
||||
|
|
@ -975,20 +979,20 @@ def main():
|
|||
# Disable some onnx unit tests that TensorRT doesn't supported yet
|
||||
if not is_windows():
|
||||
onnx_test_data_dir = os.path.join(source_dir, "cmake", "external", "onnx", "onnx", "backend", "test", "data", "simple")
|
||||
run_onnx_tests(build_dir, configs, onnx_test_data_dir, 'tensorrt', False, 1)
|
||||
run_onnx_tests(build_dir, configs, onnx_test_data_dir, 'tensorrt', args.enable_multi_device_test, False, 1)
|
||||
elif args.use_cuda:
|
||||
run_onnx_tests(build_dir, configs, onnx_test_data_dir, 'cuda', False, 2)
|
||||
run_onnx_tests(build_dir, configs, onnx_test_data_dir, 'cuda', args.enable_multi_device_test, False, 2)
|
||||
elif args.x86 or platform.system() == 'Darwin':
|
||||
run_onnx_tests(build_dir, configs, onnx_test_data_dir, None, False, 1)
|
||||
run_onnx_tests(build_dir, configs, onnx_test_data_dir, None, args.enable_multi_device_test, False, 1)
|
||||
elif args.use_ngraph:
|
||||
run_onnx_tests(build_dir, configs, onnx_test_data_dir, 'ngraph', True, 1)
|
||||
run_onnx_tests(build_dir, configs, onnx_test_data_dir, 'ngraph', args.enable_multi_device_test, True, 1)
|
||||
elif args.use_openvino:
|
||||
run_onnx_tests(build_dir, configs, onnx_test_data_dir, 'openvino', False, 1)
|
||||
run_onnx_tests(build_dir, configs, onnx_test_data_dir, 'openvino', args.enable_multi_device_test, False, 1)
|
||||
# TODO: parallel executor test fails on MacOS
|
||||
elif args.use_nuphar:
|
||||
run_onnx_tests(build_dir, configs, onnx_test_data_dir, 'nuphar', False, 1)
|
||||
run_onnx_tests(build_dir, configs, onnx_test_data_dir, 'nuphar', args.enable_multi_device_test, False, 1)
|
||||
else:
|
||||
run_onnx_tests(build_dir, configs, onnx_test_data_dir, None, True, 0)
|
||||
run_onnx_tests(build_dir, configs, onnx_test_data_dir, None, args.enable_multi_device_test, True, 0)
|
||||
|
||||
if args.use_mkldnn:
|
||||
mkldnn_run_onnx_tests(build_dir, configs, onnx_test_data_dir)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,8 @@
|
|||
jobs:
|
||||
- template: templates/linux-ci.yml
|
||||
parameters:
|
||||
AgentPool : 'Linux-Multi-GPU'
|
||||
JobName: 'Linux_CI_Multi_GPU_Dev'
|
||||
BuildCommand: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory) -x "--enable_multi_device_test"'
|
||||
DoNugetPack: 'false'
|
||||
ArtifactName: 'drop-linux'
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
jobs:
|
||||
- template: templates/linux-ci.yml
|
||||
parameters:
|
||||
AgentPool : 'Linux-Multi-GPU'
|
||||
JobName: 'Linux_CI_Multi_GPU_TensorRT_Dev'
|
||||
# The latest TensorRT container (R19.09) only supports ubuntu18.04
|
||||
BuildCommand: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu18.04 -d tensorrt -r $(Build.BinariesDirectory) -p 3.6 -x "--enable_multi_device_test"'
|
||||
DoNugetPack: 'false'
|
||||
ArtifactName: 'drop-linux'
|
||||
Loading…
Reference in a new issue