Rashuai/onnx test reduce mem (#790)

* define new test load function

* remove bak file

* add stat operator

* add arguments

* fix comments

* try enable fp16_tiny_yolov2 on linux

* fix compile err

* try enable fp16_tiny_yolov2
This commit is contained in:
RandySheriffH 2019-04-16 15:47:52 -07:00 committed by GitHub
parent 3a8b9a4918
commit 60d71d63b5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 72 additions and 45 deletions

View file

@ -45,6 +45,32 @@ class TestResultStat {
std::string ToString();
TestResultStat& operator += (const TestResultStat& result) {
total_test_case_count += result.total_test_case_count;
total_model_count += result.total_model_count;
succeeded += result.succeeded;
not_implemented += result.not_implemented;
load_model_failed += result.load_model_failed;
throwed_exception += result.throwed_exception;
result_differs += result.result_differs;
skipped += result.skipped;
invalid_graph += result.invalid_graph;
for(const auto& s:result.not_implemented_kernels) {
AddNotImplementedKernels(s);
}
for(const auto& s:result.failed_kernels) {
AddNotImplementedKernels(s);
}
for(const auto& s:result.failed_test_cases) {
AddNotImplementedKernels(s);
}
return *this;
}
private:
onnxruntime::OrtMutex m_;
std::unordered_set<std::string> not_implemented_kernels;

View file

@ -181,8 +181,6 @@ int real_main(int argc, char* argv[], OrtEnv** p_env) {
double per_sample_tolerance = 1e-3;
// when cuda is enabled, set it to a larger value for resolving random MNIST test failure
double relative_per_sample_tolerance = enable_cuda ? 0.017 : 1e-3;
std::vector<ITestCase*> tests =
LoadTests(data_dirs, whitelisted_test_cases, per_sample_tolerance, relative_per_sample_tolerance);
SessionOptionsWrapper sf(env);
if (enable_cpu_mem_arena)
sf.EnableCpuMemArena();
@ -204,23 +202,6 @@ int real_main(int argc, char* argv[], OrtEnv** p_env) {
if (enable_cuda) {
#ifdef USE_CUDA
ORT_THROW_ON_ERROR(OrtSessionOptionsAppendExecutionProvider_CUDA(sf, 0));
// Filter out some flaky tests from cuda test runs. Those tests
// caused random segfault in CUDA 9.1.
// TODO: remove this list once we fully moved to CUDA10
// clang-format off
std::unordered_set<std::string> cuda_flaky_tests = {
"fp16_inception_v1", "fp16_shufflenet", "fp16_tiny_yolov2"
};
for (auto it = tests.begin(); it != tests.end();) {
auto iter = cuda_flaky_tests.find((*it)->GetTestCaseName());
if (iter != cuda_flaky_tests.end()) {
delete *it;
it = tests.erase(it);
}
else {
++it;
}
}
#else
fprintf(stderr, "CUDA is not supported in this build");
return -1;
@ -243,6 +224,37 @@ int real_main(int argc, char* argv[], OrtEnv** p_env) {
#endif
}
std::unordered_set<std::string> cuda_flaky_tests = {
"fp16_inception_v1", "fp16_shufflenet", "fp16_tiny_yolov2"};
#if (defined (_WIN32) && !defined(_WIN64)) || (defined(__GNUG__) && !defined(__LP64__))
//Minimize mem consumption
LoadTests (data_dirs, whitelisted_test_cases, per_sample_tolerance, relative_per_sample_tolerance, [&stat, &sf, enable_cuda, &cuda_flaky_tests] (ITestCase* l) {
std::unique_ptr<ITestCase> test_case_ptr(l);
if (enable_cuda && cuda_flaky_tests.find(l->GetTestCaseName()) != cuda_flaky_tests.end()) {
return;
}
TestResultStat per_case_stat;
std::vector<ITestCase*> per_case_tests = {l};
TestEnv per_case_args(per_case_tests, per_case_stat, sf);
RunTests(per_case_args, 1, 1, 1, GetDefaultThreadPool(Env::Default()));
stat += per_case_stat;
});
#else
std::vector<ITestCase*> tests;
LoadTests(data_dirs, whitelisted_test_cases, per_sample_tolerance, relative_per_sample_tolerance, [&tests] (ITestCase* l) { tests.push_back(l); });
if (enable_cuda) {
for (auto it = tests.begin(); it != tests.end();) {
auto iter = cuda_flaky_tests.find((*it)->GetTestCaseName());
if (iter != cuda_flaky_tests.end()) {
delete *it;
it = tests.erase(it);
}
else {
++it;
}
}
}
TestEnv args(tests, stat, sf);
Status st = RunTests(args, p_models, concurrent_session_runs, static_cast<size_t>(repeat_count),
GetDefaultThreadPool(Env::Default()));
@ -250,12 +262,12 @@ int real_main(int argc, char* argv[], OrtEnv** p_env) {
fprintf(stderr, "%s\n", st.ErrorMessage().c_str());
return -1;
}
std::string res = stat.ToString();
fwrite(res.c_str(), 1, res.size(), stdout);
for (ITestCase* l : tests) {
delete l;
}
#endif
std::string res = stat.ToString();
fwrite(res.c_str(), 1, res.size(), stdout);
}
// clang-format off
std::map<std::string, std::string> broken_tests{
@ -348,24 +360,12 @@ int real_main(int argc, char* argv[], OrtEnv** p_env) {
#endif
// clang-format on
#ifdef _WIN32
broken_tests["tf_inception_resnet_v2"] = "failed: type mismatch";
broken_tests["tf_inception_v4"] = "failed: type mismatch";
broken_tests["tf_resnet_v1_101"] = "failed: type mismatch";
broken_tests["tf_resnet_v1_152"] = "failed: type mismatch";
broken_tests["tf_resnet_v2_101"] = "failed: type mismatch";
broken_tests["tf_resnet_v2_152"] = "failed: type mismatch";
#if defined (_WIN32) && !defined(_WIN64)
broken_tests["vgg19"] = "failed: bad allocation";
broken_tests["tf_nasnet_large"] = "failed: bad allocation";
broken_tests["tf_pnasnet_large"] = "failed: bad allocation";
broken_tests["zfnet512"] = "failed: bad allocation";
#endif
#ifdef __GNUG__
#ifndef __LP64__
#if defined(__GNUG__) && !defined(__LP64__)
broken_tests["nonzero_example"] = "failed: type mismatch";
broken_tests["fp16_tiny_yolov2"] = "Need to adjust the per_sample_tolerance: 0.2";
#endif
#endif
int result = 0;

View file

@ -242,10 +242,10 @@ Status RunTests(TestEnv& env, int p_models, int concurrent_runs, size_t repeat_c
return common::Status::OK();
}
std::vector<ITestCase*> LoadTests(const std::vector<std::basic_string<PATH_CHAR_TYPE>>& input_paths,
const std::vector<std::basic_string<PATH_CHAR_TYPE>>& whitelisted_test_cases,
double default_per_sample_tolerance, double default_relative_per_sample_tolerance) {
std::vector<ITestCase*> tests;
void LoadTests(const std::vector<std::basic_string<PATH_CHAR_TYPE>>& input_paths,
const std::vector<std::basic_string<PATH_CHAR_TYPE>>& whitelisted_test_cases,
double default_per_sample_tolerance, double default_relative_per_sample_tolerance,
const std::function<void(ITestCase*)>& process_function) {
std::vector<std::basic_string<PATH_CHAR_TYPE>> paths(input_paths);
while (!paths.empty()) {
std::basic_string<PATH_CHAR_TYPE> node_data_root_path = paths.back();
@ -271,11 +271,10 @@ std::vector<ITestCase*> LoadTests(const std::vector<std::basic_string<PATH_CHAR_
ITestCase* l = CreateOnnxTestCase(ToMBString(test_case_name), TestModelInfo::LoadOnnxModel(p.c_str()),
default_per_sample_tolerance, default_relative_per_sample_tolerance);
tests.push_back(l);
process_function(l);
return true;
});
}
return tests;
}
SeqTestRunner::SeqTestRunner(OrtSession* session1,

View file

@ -128,9 +128,11 @@ struct DataTask {
const size_t task_id;
};
std::vector<ITestCase*> LoadTests(const std::vector<std::basic_string<PATH_CHAR_TYPE>>& input_paths,
const std::vector<std::basic_string<PATH_CHAR_TYPE>>& whitelisted_test_cases,
double default_per_sample_tolerance, double default_relative_per_sample_tolerance);
void LoadTests(const std::vector<std::basic_string<PATH_CHAR_TYPE>>& input_paths,
const std::vector<std::basic_string<PATH_CHAR_TYPE>>& whitelisted_test_cases,
double default_per_sample_tolerance, double default_relative_per_sample_tolerance,
const std::function<void(ITestCase*)>& process_function);
//Do not run this function in the thread pool passed in
::onnxruntime::common::Status RunTests(TestEnv& env, int p_models, int concurrent_runs, size_t repeat_count, PThreadPool tpool);
EXECUTE_RESULT StatusCodeToExecuteResult(int input);