Rashuai/onnx test reduce mem (#790)

* define new test load function * remove bak file * add stat operator * add arguments * fix comments * try enable fp16_tiny_yolov2 on linux * fix compile err * try enable fp16_tiny_yolov2
2026-07-30 20:18:08 +00:00 · 2019-04-16 15:47:52 -07:00 · 2019-04-16 15:47:52 -07:00 · 60d71d63b5
commit 60d71d63b5
parent 3a8b9a4918
4 changed files with 72 additions and 45 deletions
--- a/onnxruntime/test/onnx/TestResultStat.h
+++ b/onnxruntime/test/onnx/TestResultStat.h
@ -45,6 +45,32 @@ class TestResultStat {

  std::string ToString();

+  TestResultStat& operator += (const TestResultStat& result) {
+    total_test_case_count += result.total_test_case_count;
+    total_model_count     += result.total_model_count;
+    succeeded             += result.succeeded;
+    not_implemented       += result.not_implemented;
+    load_model_failed     += result.load_model_failed;
+    throwed_exception     += result.throwed_exception;
+    result_differs        += result.result_differs;
+    skipped               += result.skipped;
+    invalid_graph         += result.invalid_graph;
+
+    for(const auto& s:result.not_implemented_kernels) {
+        AddNotImplementedKernels(s);
+    }
+
+    for(const auto& s:result.failed_kernels) {
+        AddNotImplementedKernels(s);
+    }
+
+    for(const auto& s:result.failed_test_cases) {
+        AddNotImplementedKernels(s);
+    }
+
+    return *this;
+  }
+
 private:
  onnxruntime::OrtMutex m_;
  std::unordered_set<std::string> not_implemented_kernels;
--- a/onnxruntime/test/onnx/main.cc
+++ b/onnxruntime/test/onnx/main.cc
@ -181,8 +181,6 @@ int real_main(int argc, char* argv[], OrtEnv** p_env) {
    double per_sample_tolerance = 1e-3;
    // when cuda is enabled, set it to a larger value for resolving random MNIST test failure
    double relative_per_sample_tolerance = enable_cuda ? 0.017 : 1e-3;
-    std::vector<ITestCase*> tests =
-        LoadTests(data_dirs, whitelisted_test_cases, per_sample_tolerance, relative_per_sample_tolerance);
    SessionOptionsWrapper sf(env);
    if (enable_cpu_mem_arena)
      sf.EnableCpuMemArena();
@ -204,23 +202,6 @@ int real_main(int argc, char* argv[], OrtEnv** p_env) {
    if (enable_cuda) {
 #ifdef USE_CUDA
      ORT_THROW_ON_ERROR(OrtSessionOptionsAppendExecutionProvider_CUDA(sf, 0));
-      // Filter out some flaky tests from cuda test runs. Those tests
-      // caused random segfault in CUDA 9.1. 
-      // TODO: remove this list once we fully moved to CUDA10
-      // clang-format off
-      std::unordered_set<std::string> cuda_flaky_tests = {
-        "fp16_inception_v1", "fp16_shufflenet", "fp16_tiny_yolov2"
-      };
-      for (auto it = tests.begin(); it != tests.end();) {
-        auto iter = cuda_flaky_tests.find((*it)->GetTestCaseName());
-        if (iter != cuda_flaky_tests.end()) {
-          delete *it;
-          it = tests.erase(it);
-        }
-        else {
-          ++it;
-        }
-      }
 #else
      fprintf(stderr, "CUDA is not supported in this build");
      return -1;
@ -243,6 +224,37 @@ int real_main(int argc, char* argv[], OrtEnv** p_env) {
 #endif
    }

+    std::unordered_set<std::string> cuda_flaky_tests = {
+      "fp16_inception_v1", "fp16_shufflenet", "fp16_tiny_yolov2"};
+
+#if (defined (_WIN32) && !defined(_WIN64)) || (defined(__GNUG__) && !defined(__LP64__))
+    //Minimize mem consumption
+    LoadTests (data_dirs, whitelisted_test_cases, per_sample_tolerance, relative_per_sample_tolerance, [&stat, &sf, enable_cuda, &cuda_flaky_tests] (ITestCase* l) {
+      std::unique_ptr<ITestCase> test_case_ptr(l);
+      if (enable_cuda && cuda_flaky_tests.find(l->GetTestCaseName()) != cuda_flaky_tests.end()) {
+        return;
+      }
+      TestResultStat per_case_stat;
+      std::vector<ITestCase*> per_case_tests = {l};
+      TestEnv per_case_args(per_case_tests, per_case_stat, sf);
+      RunTests(per_case_args, 1, 1, 1, GetDefaultThreadPool(Env::Default()));
+      stat += per_case_stat;
+    });
+#else
+    std::vector<ITestCase*> tests;
+    LoadTests(data_dirs, whitelisted_test_cases, per_sample_tolerance, relative_per_sample_tolerance, [&tests] (ITestCase* l) { tests.push_back(l); });
+    if (enable_cuda) {
+      for (auto it = tests.begin(); it != tests.end();) {
+        auto iter = cuda_flaky_tests.find((*it)->GetTestCaseName());
+        if (iter != cuda_flaky_tests.end()) {
+          delete *it;
+          it = tests.erase(it);
+        }
+        else {
+          ++it;
+        }
+      }
+    }
    TestEnv args(tests, stat, sf);
    Status st = RunTests(args, p_models, concurrent_session_runs, static_cast<size_t>(repeat_count),
                         GetDefaultThreadPool(Env::Default()));
@ -250,12 +262,12 @@ int real_main(int argc, char* argv[], OrtEnv** p_env) {
      fprintf(stderr, "%s\n", st.ErrorMessage().c_str());
      return -1;
    }
-
-    std::string res = stat.ToString();
-    fwrite(res.c_str(), 1, res.size(), stdout);
    for (ITestCase* l : tests) {
      delete l;
    }
+#endif
+    std::string res = stat.ToString();
+    fwrite(res.c_str(), 1, res.size(), stdout);
  }
  // clang-format off
  std::map<std::string, std::string> broken_tests{
@ -348,24 +360,12 @@ int real_main(int argc, char* argv[], OrtEnv** p_env) {
 #endif
  // clang-format on

-#ifdef _WIN32
-  broken_tests["tf_inception_resnet_v2"] = "failed: type mismatch";
-  broken_tests["tf_inception_v4"] = "failed: type mismatch";
-  broken_tests["tf_resnet_v1_101"] = "failed: type mismatch";
-  broken_tests["tf_resnet_v1_152"] = "failed: type mismatch";
-  broken_tests["tf_resnet_v2_101"] = "failed: type mismatch";
-  broken_tests["tf_resnet_v2_152"] = "failed: type mismatch";
+#if defined (_WIN32) && !defined(_WIN64)
  broken_tests["vgg19"] = "failed: bad allocation";
-  broken_tests["tf_nasnet_large"] = "failed: bad allocation";
-  broken_tests["tf_pnasnet_large"] = "failed: bad allocation";
-  broken_tests["zfnet512"] = "failed: bad allocation";
 #endif

-#ifdef __GNUG__
-#ifndef __LP64__
+#if defined(__GNUG__) && !defined(__LP64__)
  broken_tests["nonzero_example"] = "failed: type mismatch";
-  broken_tests["fp16_tiny_yolov2"] = "Need to adjust the per_sample_tolerance: 0.2";
-#endif
 #endif

  int result = 0;
--- a/onnxruntime/test/onnx/runner.cc
+++ b/onnxruntime/test/onnx/runner.cc
@ -242,10 +242,10 @@ Status RunTests(TestEnv& env, int p_models, int concurrent_runs, size_t repeat_c
  return common::Status::OK();
 }

-std::vector<ITestCase*> LoadTests(const std::vector<std::basic_string<PATH_CHAR_TYPE>>& input_paths,
-                                  const std::vector<std::basic_string<PATH_CHAR_TYPE>>& whitelisted_test_cases,
-                                  double default_per_sample_tolerance, double default_relative_per_sample_tolerance) {
-  std::vector<ITestCase*> tests;
+void LoadTests(const std::vector<std::basic_string<PATH_CHAR_TYPE>>& input_paths,
+               const std::vector<std::basic_string<PATH_CHAR_TYPE>>& whitelisted_test_cases,
+               double default_per_sample_tolerance, double default_relative_per_sample_tolerance,
+               const std::function<void(ITestCase*)>& process_function) {
  std::vector<std::basic_string<PATH_CHAR_TYPE>> paths(input_paths);
  while (!paths.empty()) {
    std::basic_string<PATH_CHAR_TYPE> node_data_root_path = paths.back();
@ -271,11 +271,10 @@ std::vector<ITestCase*> LoadTests(const std::vector<std::basic_string<PATH_CHAR_

      ITestCase* l = CreateOnnxTestCase(ToMBString(test_case_name), TestModelInfo::LoadOnnxModel(p.c_str()),
                                        default_per_sample_tolerance, default_relative_per_sample_tolerance);
-      tests.push_back(l);
+      process_function(l);
      return true;
    });
  }
-  return tests;
 }

 SeqTestRunner::SeqTestRunner(OrtSession* session1,
--- a/onnxruntime/test/onnx/runner.h
+++ b/onnxruntime/test/onnx/runner.h
@ -128,9 +128,11 @@ struct DataTask {
  const size_t task_id;
 };

-std::vector<ITestCase*> LoadTests(const std::vector<std::basic_string<PATH_CHAR_TYPE>>& input_paths,
-                                  const std::vector<std::basic_string<PATH_CHAR_TYPE>>& whitelisted_test_cases,
-                                  double default_per_sample_tolerance, double default_relative_per_sample_tolerance);
+void LoadTests(const std::vector<std::basic_string<PATH_CHAR_TYPE>>& input_paths,
+               const std::vector<std::basic_string<PATH_CHAR_TYPE>>& whitelisted_test_cases,
+               double default_per_sample_tolerance, double default_relative_per_sample_tolerance,
+               const std::function<void(ITestCase*)>& process_function);
+
 //Do not run this function in the thread pool passed in
 ::onnxruntime::common::Status RunTests(TestEnv& env, int p_models, int concurrent_runs, size_t repeat_count, PThreadPool tpool);
 EXECUTE_RESULT StatusCodeToExecuteResult(int input);