From 60d71d63b58e5aaf7ec7c12567be5dcf015d254b Mon Sep 17 00:00:00 2001
From: RandySheriffH <48490400+RandySheriffH@users.noreply.github.com>
Date: Tue, 16 Apr 2019 15:47:52 -0700
Subject: [PATCH] Rashuai/onnx test reduce mem (#790)

* define new test load function

* remove bak file

* add stat operator

* add arguments

* fix comments

* try enable fp16_tiny_yolov2 on linux

* fix compile err

* try enable fp16_tiny_yolov2
---
 onnxruntime/test/onnx/TestResultStat.h | 26 ++++++++++
 onnxruntime/test/onnx/main.cc          | 72 +++++++++++++-------------
 onnxruntime/test/onnx/runner.cc        | 11 ++--
 onnxruntime/test/onnx/runner.h         |  8 +--
 4 files changed, 72 insertions(+), 45 deletions(-)

diff --git a/onnxruntime/test/onnx/TestResultStat.h b/onnxruntime/test/onnx/TestResultStat.h
index 12c4174c25..e289be3b4b 100644
--- a/onnxruntime/test/onnx/TestResultStat.h
+++ b/onnxruntime/test/onnx/TestResultStat.h
@@ -45,6 +45,32 @@ class TestResultStat {
 
   std::string ToString();
 
+  TestResultStat& operator += (const TestResultStat& result) {
+    total_test_case_count += result.total_test_case_count;
+    total_model_count     += result.total_model_count;
+    succeeded             += result.succeeded;
+    not_implemented       += result.not_implemented;
+    load_model_failed     += result.load_model_failed;
+    throwed_exception     += result.throwed_exception;
+    result_differs        += result.result_differs;
+    skipped               += result.skipped;
+    invalid_graph         += result.invalid_graph;
+
+    for(const auto& s:result.not_implemented_kernels) {
+        AddNotImplementedKernels(s);
+    }
+
+    for(const auto& s:result.failed_kernels) {
+        AddNotImplementedKernels(s);
+    }
+
+    for(const auto& s:result.failed_test_cases) {
+        AddNotImplementedKernels(s);
+    }
+
+    return *this;
+  }
+
  private:
   onnxruntime::OrtMutex m_;
   std::unordered_set<std::string> not_implemented_kernels;
diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc
index 4152c2aa8f..250c72318c 100644
--- a/onnxruntime/test/onnx/main.cc
+++ b/onnxruntime/test/onnx/main.cc
@@ -181,8 +181,6 @@ int real_main(int argc, char* argv[], OrtEnv** p_env) {
     double per_sample_tolerance = 1e-3;
     // when cuda is enabled, set it to a larger value for resolving random MNIST test failure
     double relative_per_sample_tolerance = enable_cuda ? 0.017 : 1e-3;
-    std::vector<ITestCase*> tests =
-        LoadTests(data_dirs, whitelisted_test_cases, per_sample_tolerance, relative_per_sample_tolerance);
     SessionOptionsWrapper sf(env);
     if (enable_cpu_mem_arena)
       sf.EnableCpuMemArena();
@@ -204,23 +202,6 @@ int real_main(int argc, char* argv[], OrtEnv** p_env) {
     if (enable_cuda) {
 #ifdef USE_CUDA
       ORT_THROW_ON_ERROR(OrtSessionOptionsAppendExecutionProvider_CUDA(sf, 0));
-      // Filter out some flaky tests from cuda test runs. Those tests
-      // caused random segfault in CUDA 9.1. 
-      // TODO: remove this list once we fully moved to CUDA10
-      // clang-format off
-      std::unordered_set<std::string> cuda_flaky_tests = {
-        "fp16_inception_v1", "fp16_shufflenet", "fp16_tiny_yolov2"
-      };
-      for (auto it = tests.begin(); it != tests.end();) {
-        auto iter = cuda_flaky_tests.find((*it)->GetTestCaseName());
-        if (iter != cuda_flaky_tests.end()) {
-          delete *it;
-          it = tests.erase(it);
-        }
-        else {
-          ++it;
-        }
-      }
 #else
       fprintf(stderr, "CUDA is not supported in this build");
       return -1;
@@ -243,6 +224,37 @@ int real_main(int argc, char* argv[], OrtEnv** p_env) {
 #endif
     }
 
+    std::unordered_set<std::string> cuda_flaky_tests = {
+      "fp16_inception_v1", "fp16_shufflenet", "fp16_tiny_yolov2"};
+
+#if (defined (_WIN32) && !defined(_WIN64)) || (defined(__GNUG__) && !defined(__LP64__))
+    //Minimize mem consumption
+    LoadTests (data_dirs, whitelisted_test_cases, per_sample_tolerance, relative_per_sample_tolerance, [&stat, &sf, enable_cuda, &cuda_flaky_tests] (ITestCase* l) {
+      std::unique_ptr<ITestCase> test_case_ptr(l);
+      if (enable_cuda && cuda_flaky_tests.find(l->GetTestCaseName()) != cuda_flaky_tests.end()) {
+        return;
+      }
+      TestResultStat per_case_stat;
+      std::vector<ITestCase*> per_case_tests = {l};
+      TestEnv per_case_args(per_case_tests, per_case_stat, sf);
+      RunTests(per_case_args, 1, 1, 1, GetDefaultThreadPool(Env::Default()));
+      stat += per_case_stat;
+    });
+#else
+    std::vector<ITestCase*> tests;
+    LoadTests(data_dirs, whitelisted_test_cases, per_sample_tolerance, relative_per_sample_tolerance, [&tests] (ITestCase* l) { tests.push_back(l); });
+    if (enable_cuda) {
+      for (auto it = tests.begin(); it != tests.end();) {
+        auto iter = cuda_flaky_tests.find((*it)->GetTestCaseName());
+        if (iter != cuda_flaky_tests.end()) {
+          delete *it;
+          it = tests.erase(it);
+        }
+        else {
+          ++it;
+        }
+      }
+    }
     TestEnv args(tests, stat, sf);
     Status st = RunTests(args, p_models, concurrent_session_runs, static_cast<size_t>(repeat_count),
                          GetDefaultThreadPool(Env::Default()));
@@ -250,12 +262,12 @@ int real_main(int argc, char* argv[], OrtEnv** p_env) {
       fprintf(stderr, "%s\n", st.ErrorMessage().c_str());
       return -1;
     }
-
-    std::string res = stat.ToString();
-    fwrite(res.c_str(), 1, res.size(), stdout);
     for (ITestCase* l : tests) {
       delete l;
     }
+#endif
+    std::string res = stat.ToString();
+    fwrite(res.c_str(), 1, res.size(), stdout);
   }
   // clang-format off
   std::map<std::string, std::string> broken_tests{
@@ -348,24 +360,12 @@ int real_main(int argc, char* argv[], OrtEnv** p_env) {
 #endif
   // clang-format on
 
-#ifdef _WIN32
-  broken_tests["tf_inception_resnet_v2"] = "failed: type mismatch";
-  broken_tests["tf_inception_v4"] = "failed: type mismatch";
-  broken_tests["tf_resnet_v1_101"] = "failed: type mismatch";
-  broken_tests["tf_resnet_v1_152"] = "failed: type mismatch";
-  broken_tests["tf_resnet_v2_101"] = "failed: type mismatch";
-  broken_tests["tf_resnet_v2_152"] = "failed: type mismatch";
+#if defined (_WIN32) && !defined(_WIN64)
   broken_tests["vgg19"] = "failed: bad allocation";
-  broken_tests["tf_nasnet_large"] = "failed: bad allocation";
-  broken_tests["tf_pnasnet_large"] = "failed: bad allocation";
-  broken_tests["zfnet512"] = "failed: bad allocation";
 #endif
 
-#ifdef __GNUG__
-#ifndef __LP64__
+#if defined(__GNUG__) && !defined(__LP64__)
   broken_tests["nonzero_example"] = "failed: type mismatch";
-  broken_tests["fp16_tiny_yolov2"] = "Need to adjust the per_sample_tolerance: 0.2";
-#endif
 #endif
 
   int result = 0;
diff --git a/onnxruntime/test/onnx/runner.cc b/onnxruntime/test/onnx/runner.cc
index b80f01793c..881385fd4f 100644
--- a/onnxruntime/test/onnx/runner.cc
+++ b/onnxruntime/test/onnx/runner.cc
@@ -242,10 +242,10 @@ Status RunTests(TestEnv& env, int p_models, int concurrent_runs, size_t repeat_c
   return common::Status::OK();
 }
 
-std::vector<ITestCase*> LoadTests(const std::vector<std::basic_string<PATH_CHAR_TYPE>>& input_paths,
-                                  const std::vector<std::basic_string<PATH_CHAR_TYPE>>& whitelisted_test_cases,
-                                  double default_per_sample_tolerance, double default_relative_per_sample_tolerance) {
-  std::vector<ITestCase*> tests;
+void LoadTests(const std::vector<std::basic_string<PATH_CHAR_TYPE>>& input_paths,
+               const std::vector<std::basic_string<PATH_CHAR_TYPE>>& whitelisted_test_cases,
+               double default_per_sample_tolerance, double default_relative_per_sample_tolerance,
+               const std::function<void(ITestCase*)>& process_function) {
   std::vector<std::basic_string<PATH_CHAR_TYPE>> paths(input_paths);
   while (!paths.empty()) {
     std::basic_string<PATH_CHAR_TYPE> node_data_root_path = paths.back();
@@ -271,11 +271,10 @@ std::vector<ITestCase*> LoadTests(const std::vector<std::basic_string<PATH_CHAR_
 
       ITestCase* l = CreateOnnxTestCase(ToMBString(test_case_name), TestModelInfo::LoadOnnxModel(p.c_str()),
                                         default_per_sample_tolerance, default_relative_per_sample_tolerance);
-      tests.push_back(l);
+      process_function(l);
       return true;
     });
   }
-  return tests;
 }
 
 SeqTestRunner::SeqTestRunner(OrtSession* session1,
diff --git a/onnxruntime/test/onnx/runner.h b/onnxruntime/test/onnx/runner.h
index 4516667b14..a7821b5e9f 100644
--- a/onnxruntime/test/onnx/runner.h
+++ b/onnxruntime/test/onnx/runner.h
@@ -128,9 +128,11 @@ struct DataTask {
   const size_t task_id;
 };
 
-std::vector<ITestCase*> LoadTests(const std::vector<std::basic_string<PATH_CHAR_TYPE>>& input_paths,
-                                  const std::vector<std::basic_string<PATH_CHAR_TYPE>>& whitelisted_test_cases,
-                                  double default_per_sample_tolerance, double default_relative_per_sample_tolerance);
+void LoadTests(const std::vector<std::basic_string<PATH_CHAR_TYPE>>& input_paths,
+               const std::vector<std::basic_string<PATH_CHAR_TYPE>>& whitelisted_test_cases,
+               double default_per_sample_tolerance, double default_relative_per_sample_tolerance,
+               const std::function<void(ITestCase*)>& process_function);
+
 //Do not run this function in the thread pool passed in
 ::onnxruntime::common::Status RunTests(TestEnv& env, int p_models, int concurrent_runs, size_t repeat_count, PThreadPool tpool);
 EXECUTE_RESULT StatusCodeToExecuteResult(int input);