From 60d71d63b58e5aaf7ec7c12567be5dcf015d254b Mon Sep 17 00:00:00 2001 From: RandySheriffH <48490400+RandySheriffH@users.noreply.github.com> Date: Tue, 16 Apr 2019 15:47:52 -0700 Subject: [PATCH] Rashuai/onnx test reduce mem (#790) * define new test load function * remove bak file * add stat operator * add arguments * fix comments * try enable fp16_tiny_yolov2 on linux * fix compile err * try enable fp16_tiny_yolov2 --- onnxruntime/test/onnx/TestResultStat.h | 26 ++++++++++ onnxruntime/test/onnx/main.cc | 72 +++++++++++++------------- onnxruntime/test/onnx/runner.cc | 11 ++-- onnxruntime/test/onnx/runner.h | 8 +-- 4 files changed, 72 insertions(+), 45 deletions(-) diff --git a/onnxruntime/test/onnx/TestResultStat.h b/onnxruntime/test/onnx/TestResultStat.h index 12c4174c25..e289be3b4b 100644 --- a/onnxruntime/test/onnx/TestResultStat.h +++ b/onnxruntime/test/onnx/TestResultStat.h @@ -45,6 +45,32 @@ class TestResultStat { std::string ToString(); + TestResultStat& operator += (const TestResultStat& result) { + total_test_case_count += result.total_test_case_count; + total_model_count += result.total_model_count; + succeeded += result.succeeded; + not_implemented += result.not_implemented; + load_model_failed += result.load_model_failed; + throwed_exception += result.throwed_exception; + result_differs += result.result_differs; + skipped += result.skipped; + invalid_graph += result.invalid_graph; + + for(const auto& s:result.not_implemented_kernels) { + AddNotImplementedKernels(s); + } + + for(const auto& s:result.failed_kernels) { + AddNotImplementedKernels(s); + } + + for(const auto& s:result.failed_test_cases) { + AddNotImplementedKernels(s); + } + + return *this; + } + private: onnxruntime::OrtMutex m_; std::unordered_set not_implemented_kernels; diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index 4152c2aa8f..250c72318c 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -181,8 +181,6 @@ int real_main(int argc, char* argv[], OrtEnv** p_env) { double per_sample_tolerance = 1e-3; // when cuda is enabled, set it to a larger value for resolving random MNIST test failure double relative_per_sample_tolerance = enable_cuda ? 0.017 : 1e-3; - std::vector tests = - LoadTests(data_dirs, whitelisted_test_cases, per_sample_tolerance, relative_per_sample_tolerance); SessionOptionsWrapper sf(env); if (enable_cpu_mem_arena) sf.EnableCpuMemArena(); @@ -204,23 +202,6 @@ int real_main(int argc, char* argv[], OrtEnv** p_env) { if (enable_cuda) { #ifdef USE_CUDA ORT_THROW_ON_ERROR(OrtSessionOptionsAppendExecutionProvider_CUDA(sf, 0)); - // Filter out some flaky tests from cuda test runs. Those tests - // caused random segfault in CUDA 9.1. - // TODO: remove this list once we fully moved to CUDA10 - // clang-format off - std::unordered_set cuda_flaky_tests = { - "fp16_inception_v1", "fp16_shufflenet", "fp16_tiny_yolov2" - }; - for (auto it = tests.begin(); it != tests.end();) { - auto iter = cuda_flaky_tests.find((*it)->GetTestCaseName()); - if (iter != cuda_flaky_tests.end()) { - delete *it; - it = tests.erase(it); - } - else { - ++it; - } - } #else fprintf(stderr, "CUDA is not supported in this build"); return -1; @@ -243,6 +224,37 @@ int real_main(int argc, char* argv[], OrtEnv** p_env) { #endif } + std::unordered_set cuda_flaky_tests = { + "fp16_inception_v1", "fp16_shufflenet", "fp16_tiny_yolov2"}; + +#if (defined (_WIN32) && !defined(_WIN64)) || (defined(__GNUG__) && !defined(__LP64__)) + //Minimize mem consumption + LoadTests (data_dirs, whitelisted_test_cases, per_sample_tolerance, relative_per_sample_tolerance, [&stat, &sf, enable_cuda, &cuda_flaky_tests] (ITestCase* l) { + std::unique_ptr test_case_ptr(l); + if (enable_cuda && cuda_flaky_tests.find(l->GetTestCaseName()) != cuda_flaky_tests.end()) { + return; + } + TestResultStat per_case_stat; + std::vector per_case_tests = {l}; + TestEnv per_case_args(per_case_tests, per_case_stat, sf); + RunTests(per_case_args, 1, 1, 1, GetDefaultThreadPool(Env::Default())); + stat += per_case_stat; + }); +#else + std::vector tests; + LoadTests(data_dirs, whitelisted_test_cases, per_sample_tolerance, relative_per_sample_tolerance, [&tests] (ITestCase* l) { tests.push_back(l); }); + if (enable_cuda) { + for (auto it = tests.begin(); it != tests.end();) { + auto iter = cuda_flaky_tests.find((*it)->GetTestCaseName()); + if (iter != cuda_flaky_tests.end()) { + delete *it; + it = tests.erase(it); + } + else { + ++it; + } + } + } TestEnv args(tests, stat, sf); Status st = RunTests(args, p_models, concurrent_session_runs, static_cast(repeat_count), GetDefaultThreadPool(Env::Default())); @@ -250,12 +262,12 @@ int real_main(int argc, char* argv[], OrtEnv** p_env) { fprintf(stderr, "%s\n", st.ErrorMessage().c_str()); return -1; } - - std::string res = stat.ToString(); - fwrite(res.c_str(), 1, res.size(), stdout); for (ITestCase* l : tests) { delete l; } +#endif + std::string res = stat.ToString(); + fwrite(res.c_str(), 1, res.size(), stdout); } // clang-format off std::map broken_tests{ @@ -348,24 +360,12 @@ int real_main(int argc, char* argv[], OrtEnv** p_env) { #endif // clang-format on -#ifdef _WIN32 - broken_tests["tf_inception_resnet_v2"] = "failed: type mismatch"; - broken_tests["tf_inception_v4"] = "failed: type mismatch"; - broken_tests["tf_resnet_v1_101"] = "failed: type mismatch"; - broken_tests["tf_resnet_v1_152"] = "failed: type mismatch"; - broken_tests["tf_resnet_v2_101"] = "failed: type mismatch"; - broken_tests["tf_resnet_v2_152"] = "failed: type mismatch"; +#if defined (_WIN32) && !defined(_WIN64) broken_tests["vgg19"] = "failed: bad allocation"; - broken_tests["tf_nasnet_large"] = "failed: bad allocation"; - broken_tests["tf_pnasnet_large"] = "failed: bad allocation"; - broken_tests["zfnet512"] = "failed: bad allocation"; #endif -#ifdef __GNUG__ -#ifndef __LP64__ +#if defined(__GNUG__) && !defined(__LP64__) broken_tests["nonzero_example"] = "failed: type mismatch"; - broken_tests["fp16_tiny_yolov2"] = "Need to adjust the per_sample_tolerance: 0.2"; -#endif #endif int result = 0; diff --git a/onnxruntime/test/onnx/runner.cc b/onnxruntime/test/onnx/runner.cc index b80f01793c..881385fd4f 100644 --- a/onnxruntime/test/onnx/runner.cc +++ b/onnxruntime/test/onnx/runner.cc @@ -242,10 +242,10 @@ Status RunTests(TestEnv& env, int p_models, int concurrent_runs, size_t repeat_c return common::Status::OK(); } -std::vector LoadTests(const std::vector>& input_paths, - const std::vector>& whitelisted_test_cases, - double default_per_sample_tolerance, double default_relative_per_sample_tolerance) { - std::vector tests; +void LoadTests(const std::vector>& input_paths, + const std::vector>& whitelisted_test_cases, + double default_per_sample_tolerance, double default_relative_per_sample_tolerance, + const std::function& process_function) { std::vector> paths(input_paths); while (!paths.empty()) { std::basic_string node_data_root_path = paths.back(); @@ -271,11 +271,10 @@ std::vector LoadTests(const std::vector LoadTests(const std::vector>& input_paths, - const std::vector>& whitelisted_test_cases, - double default_per_sample_tolerance, double default_relative_per_sample_tolerance); +void LoadTests(const std::vector>& input_paths, + const std::vector>& whitelisted_test_cases, + double default_per_sample_tolerance, double default_relative_per_sample_tolerance, + const std::function& process_function); + //Do not run this function in the thread pool passed in ::onnxruntime::common::Status RunTests(TestEnv& env, int p_models, int concurrent_runs, size_t repeat_count, PThreadPool tpool); EXECUTE_RESULT StatusCodeToExecuteResult(int input);