Enable Gpu multi-device test for CUDA EP and Trt EP

Enable multi-device test for GPU
* Add build pipeline for TensorRT multi-GPU test
* Add code to disable fp16 test if hardware architecture not supported
* Add option to set the device id in onnx_test_runner for model tests
This commit is contained in:
Hector Li 2019-10-14 11:16:34 -07:00 committed by GitHub
parent f93be8af90
commit 640f71c91b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 54 additions and 12 deletions

View file

@ -43,6 +43,7 @@ void usage() {
"'openvino' or 'nuphar'. "
"Default: 'cpu'.\n"
"\t-x: Use parallel executor, default (without -x): sequential executor.\n"
"\t-d [device_id]: Specifies the device id for multi-device (e.g. GPU). The value should > 0\n"
"\t-o [optimization level]: Default is 1. Valid values are 0 (disable), 1 (basic), 2 (extended), 99 (all).\n"
"\t\tPlease see onnxruntime_c_api.h (enum GraphOptimizationLevel) for the full list of all optimization levels. "
"\n"
@ -101,13 +102,14 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
bool enable_mem_pattern = true;
bool enable_openvino = false;
bool enable_nnapi = false;
int device_id = 0;
GraphOptimizationLevel graph_optimization_level = ORT_DISABLE_ALL;
bool user_graph_optimization_level_set = false;
OrtLoggingLevel logging_level = ORT_LOGGING_LEVEL_WARNING;
{
int ch;
while ((ch = getopt(argc, argv, ORT_TSTR("Ac:hj:Mn:r:e:xvo:"))) != -1) {
while ((ch = getopt(argc, argv, ORT_TSTR("Ac:hj:Mn:r:e:xvo:d:"))) != -1) {
switch (ch) {
case 'A':
enable_cpu_mem_arena = false;
@ -197,6 +199,13 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
user_graph_optimization_level_set = true;
break;
}
case 'd':
device_id = static_cast<int>(OrtStrtol<PATH_CHAR_TYPE>(optarg, nullptr));
if (device_id < 0) {
usage();
return -1;
}
break;
case '?':
case 'h':
default:
@ -251,8 +260,8 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
if (enable_tensorrt) {
#ifdef USE_TENSORRT
ORT_THROW_ON_ERROR(OrtSessionOptionsAppendExecutionProvider_Tensorrt(sf, 0));
ORT_THROW_ON_ERROR(OrtSessionOptionsAppendExecutionProvider_CUDA(sf, 0));
ORT_THROW_ON_ERROR(OrtSessionOptionsAppendExecutionProvider_Tensorrt(sf, device_id));
ORT_THROW_ON_ERROR(OrtSessionOptionsAppendExecutionProvider_CUDA(sf, device_id));
#else
fprintf(stderr, "TensorRT is not supported in this build");
return -1;
@ -269,7 +278,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
}
if (enable_cuda) {
#ifdef USE_CUDA
ORT_THROW_ON_ERROR(OrtSessionOptionsAppendExecutionProvider_CUDA(sf, 0));
ORT_THROW_ON_ERROR(OrtSessionOptionsAppendExecutionProvider_CUDA(sf, device_id));
#else
fprintf(stderr, "CUDA is not supported in this build");
return -1;

View file

@ -3,6 +3,7 @@
#include "gtest/gtest.h"
#include "test/providers/provider_test_utils.h"
#include "test/common/cuda_op_test_utils.h"
namespace onnxruntime {
namespace test {
@ -29,6 +30,11 @@ TEST(GemmOpTest, GemmNoTrans) {
// Only CUDA kernel has float 16 support
#ifdef USE_CUDA
TEST(GemmOpTest, GemmNoTrans_f16) {
int min_cuda_architecture = 530;
if (!HasCudaEnvironment(min_cuda_architecture)) {
LOGS_DEFAULT(WARNING) << "Hardware NOT support FP16";
return;
}
OpTester test("Gemm");
test.AddAttribute("transA", (int64_t)0);

View file

@ -4,6 +4,7 @@
#include "core/providers/cpu/nn/pool.h"
#include "gtest/gtest.h"
#include "test/providers/provider_test_utils.h"
#include "test/common/cuda_op_test_utils.h"
using namespace std;
namespace onnxruntime {
namespace test {
@ -58,6 +59,11 @@ TEST(PoolTest, MaxPool) {
// Disable for now, still investigating the issue with cudnn lib
#ifdef USE_CUDA
TEST(PoolTest, MaxPool_F16) {
int min_cuda_architecture = 530;
if (!HasCudaEnvironment(min_cuda_architecture)) {
LOGS_DEFAULT(WARNING) << "Hardware NOT support FP16";
return;
}
OpTester test("MaxPool");
test.AddAttribute("auto_pad", "");

View file

@ -158,6 +158,7 @@ Use the individual flags to only run the specified stages.
parser.add_argument("--enable_language_interop_ops", action='store_true', help="Enable operator implemented in language other than cpp")
parser.add_argument("--cmake_generator", choices=['Visual Studio 15 2017', 'Visual Studio 16 2019'],
default='Visual Studio 15 2017', help="Specify the generator that CMake invokes. This is only supported on Windows")
parser.add_argument("--enable_multi_device_test", action='store_true', help="Test with multi-device. Mostly used for multi-device GPU")
return parser.parse_args()
def resolve_executable_path(command_or_path):
@ -608,7 +609,7 @@ def run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs, enab
if onnxml_test:
run_subprocess([sys.executable, 'onnxruntime_test_python_keras.py'], cwd=cwd, dll_path=dll_path)
def run_onnx_tests(build_dir, configs, onnx_test_data_dir, provider, enable_parallel_executor_test, num_parallel_models):
def run_onnx_tests(build_dir, configs, onnx_test_data_dir, provider, enable_multi_device_test, enable_parallel_executor_test, num_parallel_models):
for config in configs:
cwd = get_config_build_dir(build_dir, config)
if is_windows():
@ -630,6 +631,9 @@ def run_onnx_tests(build_dir, configs, onnx_test_data_dir, provider, enable_para
if num_parallel_models > 0:
cmd += ["-j", str(num_parallel_models)]
if enable_multi_device_test:
cmd += ['-d', '1']
if config != 'Debug' and os.path.exists(model_dir):
# some models in opset9 and above are not supported by TensorRT yet
if provider == 'tensorrt':
@ -975,20 +979,20 @@ def main():
# Disable some onnx unit tests that TensorRT doesn't supported yet
if not is_windows():
onnx_test_data_dir = os.path.join(source_dir, "cmake", "external", "onnx", "onnx", "backend", "test", "data", "simple")
run_onnx_tests(build_dir, configs, onnx_test_data_dir, 'tensorrt', False, 1)
run_onnx_tests(build_dir, configs, onnx_test_data_dir, 'tensorrt', args.enable_multi_device_test, False, 1)
elif args.use_cuda:
run_onnx_tests(build_dir, configs, onnx_test_data_dir, 'cuda', False, 2)
run_onnx_tests(build_dir, configs, onnx_test_data_dir, 'cuda', args.enable_multi_device_test, False, 2)
elif args.x86 or platform.system() == 'Darwin':
run_onnx_tests(build_dir, configs, onnx_test_data_dir, None, False, 1)
run_onnx_tests(build_dir, configs, onnx_test_data_dir, None, args.enable_multi_device_test, False, 1)
elif args.use_ngraph:
run_onnx_tests(build_dir, configs, onnx_test_data_dir, 'ngraph', True, 1)
run_onnx_tests(build_dir, configs, onnx_test_data_dir, 'ngraph', args.enable_multi_device_test, True, 1)
elif args.use_openvino:
run_onnx_tests(build_dir, configs, onnx_test_data_dir, 'openvino', False, 1)
run_onnx_tests(build_dir, configs, onnx_test_data_dir, 'openvino', args.enable_multi_device_test, False, 1)
# TODO: parallel executor test fails on MacOS
elif args.use_nuphar:
run_onnx_tests(build_dir, configs, onnx_test_data_dir, 'nuphar', False, 1)
run_onnx_tests(build_dir, configs, onnx_test_data_dir, 'nuphar', args.enable_multi_device_test, False, 1)
else:
run_onnx_tests(build_dir, configs, onnx_test_data_dir, None, True, 0)
run_onnx_tests(build_dir, configs, onnx_test_data_dir, None, args.enable_multi_device_test, True, 0)
if args.use_mkldnn:
mkldnn_run_onnx_tests(build_dir, configs, onnx_test_data_dir)

View file

@ -0,0 +1,8 @@
jobs:
- template: templates/linux-ci.yml
parameters:
AgentPool : 'Linux-Multi-GPU'
JobName: 'Linux_CI_Multi_GPU_Dev'
BuildCommand: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory) -x "--enable_multi_device_test"'
DoNugetPack: 'false'
ArtifactName: 'drop-linux'

View file

@ -0,0 +1,9 @@
jobs:
- template: templates/linux-ci.yml
parameters:
AgentPool : 'Linux-Multi-GPU'
JobName: 'Linux_CI_Multi_GPU_TensorRT_Dev'
# The latest TensorRT container (R19.09) only supports ubuntu18.04
BuildCommand: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu18.04 -d tensorrt -r $(Build.BinariesDirectory) -p 3.6 -x "--enable_multi_device_test"'
DoNugetPack: 'false'
ArtifactName: 'drop-linux'