From 16cef90e29f2dac3a3612994eb33cb8de1827671 Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Thu, 4 Jun 2020 07:01:39 +1000 Subject: [PATCH] General enhancements/cleanups to test exes (#4109) * General enhancements/cleanups to test exes - Support running onnxruntime_perf_test with no output file - if you're profiling the output file is often unused and can be very large - Allow failure to override early success if doing multiple runs of a test using running onnx_test_runner - e.g. if the second run fails that's more important as a final status - Clarify ownership semantics - Cleanup naming, line lengths, usage of references for required parameters etc. --- .../test/onnx/FixedCountFinishCallback.h | 8 +- onnxruntime/test/onnx/TestCase.cc | 86 ++++---- onnxruntime/test/onnx/TestCase.h | 21 +- onnxruntime/test/onnx/TestCaseResult.cc | 8 +- onnxruntime/test/onnx/TestCaseResult.h | 20 +- onnxruntime/test/onnx/main.cc | 19 +- onnxruntime/test/onnx/runner.cc | 203 ++++++++++-------- onnxruntime/test/onnx/runner.h | 31 ++- onnxruntime/test/onnx/sync_api.cc | 4 +- onnxruntime/test/onnx/testenv.cc | 12 +- onnxruntime/test/onnx/testenv.h | 2 +- onnxruntime/test/perftest/TFModelInfo.cc | 6 +- onnxruntime/test/perftest/TFModelInfo.h | 2 +- .../test/perftest/command_args_parser.cc | 17 +- onnxruntime/test/perftest/main.cc | 4 + onnxruntime/test/perftest/ort_test_session.cc | 43 ++-- onnxruntime/test/perftest/ort_test_session.h | 2 +- .../test/perftest/performance_runner.cc | 93 ++++++-- .../test/perftest/performance_runner.h | 48 +---- onnxruntime/test/util/compare_ortvalue.cc | 3 +- 20 files changed, 357 insertions(+), 275 deletions(-) diff --git a/onnxruntime/test/onnx/FixedCountFinishCallback.h b/onnxruntime/test/onnx/FixedCountFinishCallback.h index fe917d73f0..cdcbf86ca6 100644 --- a/onnxruntime/test/onnx/FixedCountFinishCallback.h +++ b/onnxruntime/test/onnx/FixedCountFinishCallback.h @@ -32,7 +32,7 @@ class FixedCountFinishCallbackImpl { if (finish_event_) OrtCloseEvent(finish_event_); } - ::onnxruntime::common::Status fail(ORT_CALLBACK_INSTANCE pci) { + ::onnxruntime::common::Status Fail(ORT_CALLBACK_INSTANCE pci) { { std::lock_guard g(m_); failed = true; @@ -41,7 +41,7 @@ class FixedCountFinishCallbackImpl { return OnnxRuntimeSetEventWhenCallbackReturns(pci, finish_event_); } - ::onnxruntime::common::Status onFinished(size_t task_index, std::shared_ptr result, ORT_CALLBACK_INSTANCE pci) { + ::onnxruntime::common::Status OnFinished(size_t task_index, std::shared_ptr result, ORT_CALLBACK_INSTANCE pci) { int v; { std::lock_guard g(m_); @@ -54,12 +54,12 @@ class FixedCountFinishCallbackImpl { return ::onnxruntime::common::Status::OK(); } - bool shouldStop() { + bool ShouldStop() { std::lock_guard g(m_); return failed; } //this function can only be invoked once - bool wait() { + bool Wait() { ORT_ENFORCE(WaitAndCloseEvent(finish_event_).IsOK()); { std::lock_guard g(m_); diff --git a/onnxruntime/test/onnx/TestCase.cc b/onnxruntime/test/onnx/TestCase.cc index 1f93116d68..81f881f8ef 100644 --- a/onnxruntime/test/onnx/TestCase.cc +++ b/onnxruntime/test/onnx/TestCase.cc @@ -276,7 +276,7 @@ OrtValue* TensorToOrtValue(const ONNX_NAMESPACE::TensorProto& t, onnxruntime::te return temp_value.release(); } -void LoopDataFile(int test_data_pb_fd, bool is_input, const TestModelInfo* modelinfo, +void LoopDataFile(int test_data_pb_fd, bool is_input, const TestModelInfo& modelinfo, std::unordered_map& name_data_map, onnxruntime::test::HeapBuffer& b, std::ostringstream& oss) { google::protobuf::io::FileInputStream f(test_data_pb_fd, protobuf_block_size_in_bytes); @@ -332,7 +332,7 @@ void LoopDataFile(int test_data_pb_fd, bool is_input, const TestModelInfo* model std::string value_name = data.name(); if (value_name.empty()) { const size_t c = name_data_map.size(); - value_name = is_input ? modelinfo->GetInputName(c) : modelinfo->GetOutputName(c); + value_name = is_input ? modelinfo.GetInputName(c) : modelinfo.GetOutputName(c); } auto pv = name_data_map.insert(std::make_pair(value_name, gvalue.release())); @@ -357,8 +357,8 @@ void LoopDataFile(int test_data_pb_fd, bool is_input, const TestModelInfo* model } // namespace -TestModelInfo* TestModelInfo::LoadOnnxModel(_In_ const PATH_CHAR_TYPE* model_url) { - return new OnnxModelInfo(model_url); +std::unique_ptr TestModelInfo::LoadOnnxModel(_In_ const PATH_CHAR_TYPE* model_url) { + return std::unique_ptr(new OnnxModelInfo(model_url)); } /** @@ -372,76 +372,73 @@ TestModelInfo* TestModelInfo::LoadOnnxModel(_In_ const PATH_CHAR_TYPE* model_url class OnnxTestCase : public ITestCase { private: std::string test_case_name_; - std::vector debuginfo_strings; - onnxruntime::OrtMutex m_; + mutable std::vector debuginfo_strings_; + mutable onnxruntime::OrtMutex m_; std::vector> test_data_dirs_; - std::string GetDatasetDebugInfoString(size_t dataset_id) override { + std::string GetDatasetDebugInfoString(size_t dataset_id) const override { std::lock_guard l(m_); - if (dataset_id < debuginfo_strings.size()) { - return debuginfo_strings[dataset_id]; + if (dataset_id < debuginfo_strings_.size()) { + return debuginfo_strings_[dataset_id]; } // return empty string return std::string(); } - void ConvertTestData(const std::vector& test_data_pbs, onnxruntime::test::HeapBuffer& b, - bool is_input, - std::unordered_map& out); + void ConvertTestData(const std::vector& test_data_pbs, + onnxruntime::test::HeapBuffer& b, bool is_input, + std::unordered_map& out) const; std::once_flag model_parsed_; std::once_flag config_parsed_; double per_sample_tolerance_; double relative_per_sample_tolerance_; bool post_processing_; - TestModelInfo* model_info_; + std::unique_ptr model_info_; ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(OnnxTestCase); public: - OnnxTestCase(const std::string& test_case_name, _In_ TestModelInfo* model, double default_per_sample_tolerance, - double default_relative_per_sample_tolerance); - ~OnnxTestCase() override { delete model_info_; } - Status GetPerSampleTolerance(double* value) override; - Status GetRelativePerSampleTolerance(double* value) override; - Status GetPostProcessing(bool* value) override; + OnnxTestCase(const std::string& test_case_name, _In_ std::unique_ptr model, + double default_per_sample_tolerance, double default_relative_per_sample_tolerance); + Status GetPerSampleTolerance(double* value) const override; + Status GetRelativePerSampleTolerance(double* value) const override; + Status GetPostProcessing(bool* value) const override; const ONNX_NAMESPACE::ValueInfoProto* GetOutputInfoFromModel(size_t i) const override { return model_info_->GetOutputInfoFromModel(i); } - size_t GetDataCount() const override { - return test_data_dirs_.size(); - } + size_t GetDataCount() const override { return test_data_dirs_.size(); } const std::string& GetNodeName() const override { return model_info_->GetNodeName(); } - const PATH_CHAR_TYPE* GetModelUrl() const override { return model_info_->GetModelUrl(); } - const std::string& GetTestCaseName() const override { - return test_case_name_; - } - std::string GetTestCaseVersion() const override { - return model_info_->GetModelVersion(); - } + const std::string& GetTestCaseName() const override { return test_case_name_; } + std::string GetTestCaseVersion() const override { return model_info_->GetModelVersion(); } + void LoadTestData(size_t id, onnxruntime::test::HeapBuffer& b, std::unordered_map&, - bool is_input) override; + bool is_input) const override; }; -ITestCase* CreateOnnxTestCase(const std::string& test_case_name, TestModelInfo* model, - double default_per_sample_tolerance, double default_relative_per_sample_tolerance) { - return new OnnxTestCase(test_case_name, model, default_per_sample_tolerance, default_relative_per_sample_tolerance); +std::unique_ptr CreateOnnxTestCase(const std::string& test_case_name, + std::unique_ptr model, + double default_per_sample_tolerance, + double default_relative_per_sample_tolerance) { + return std::unique_ptr(new OnnxTestCase(test_case_name, std::move(model), + default_per_sample_tolerance, + default_relative_per_sample_tolerance)); } -Status OnnxTestCase::GetPerSampleTolerance(double* value) { +Status OnnxTestCase::GetPerSampleTolerance(double* value) const { *value = per_sample_tolerance_; return Status::OK(); } -Status OnnxTestCase::GetRelativePerSampleTolerance(double* value) { +Status OnnxTestCase::GetRelativePerSampleTolerance(double* value) const { *value = relative_per_sample_tolerance_; return Status::OK(); } -Status OnnxTestCase::GetPostProcessing(bool* value) { +Status OnnxTestCase::GetPostProcessing(bool* value) const { *value = post_processing_; return Status::OK(); } @@ -516,7 +513,7 @@ static void LoadTensors(const std::vector& pb_files, void OnnxTestCase::LoadTestData(size_t id, onnxruntime::test::HeapBuffer& b, std::unordered_map& name_data_map, - bool is_input) { + bool is_input) const { if (id >= test_data_dirs_.size()) { ORT_THROW("index out of bound"); } @@ -529,10 +526,10 @@ void OnnxTestCase::LoadTestData(size_t id, onnxruntime::test::HeapBuffer& b, std::ostringstream oss; { std::lock_guard l(m_); - oss << debuginfo_strings[id]; + oss << debuginfo_strings_[id]; } try { - LoopDataFile(test_data_pb_fd, is_input, model_info_, name_data_map, b, oss); + LoopDataFile(test_data_pb_fd, is_input, *model_info_, name_data_map, b, oss); } catch (std::exception& ex) { std::ostringstream oss2; oss2 << "parse data file \"" << ToMBString(test_data_pb) << "\" failed:" << ex.what(); @@ -540,7 +537,7 @@ void OnnxTestCase::LoadTestData(size_t id, onnxruntime::test::HeapBuffer& b, } { std::lock_guard l(m_); - debuginfo_strings[id] = oss.str(); + debuginfo_strings_[id] = oss.str(); } return; } @@ -561,6 +558,7 @@ void OnnxTestCase::LoadTestData(size_t id, onnxruntime::test::HeapBuffer& b, } return true; }); + SortTensorFileNames(test_data_pb_files); std::vector test_data_pbs; @@ -570,7 +568,7 @@ void OnnxTestCase::LoadTestData(size_t id, onnxruntime::test::HeapBuffer& b, void OnnxTestCase::ConvertTestData(const std::vector& test_data_pbs, onnxruntime::test::HeapBuffer& b, - bool is_input, std::unordered_map& out) { + bool is_input, std::unordered_map& out) const { bool has_valid_names = true; std::vector var_names(test_data_pbs.size()); for (size_t input_index = 0; input_index != test_data_pbs.size(); ++input_index) { @@ -615,9 +613,9 @@ void OnnxTestCase::ConvertTestData(const std::vector model, double default_per_sample_tolerance, double default_relative_per_sample_tolerance) - : test_case_name_(test_case_name), model_info_(model) { + : test_case_name_(test_case_name), model_info_(std::move(model)) { std::basic_string test_case_dir = model_info_->GetDir(); // parse config @@ -649,7 +647,7 @@ OnnxTestCase::OnnxTestCase(const std::string& test_case_name, _In_ TestModelInfo if (f_type == OrtFileType::TYPE_DIR) { std::basic_string p = ConcatPathComponent(test_case_dir, filename); test_data_dirs_.push_back(p); - debuginfo_strings.push_back(ToMBString(p)); + debuginfo_strings_.push_back(ToMBString(p)); } return true; }); diff --git a/onnxruntime/test/onnx/TestCase.h b/onnxruntime/test/onnx/TestCase.h index f6f61248e0..dc3915f503 100644 --- a/onnxruntime/test/onnx/TestCase.h +++ b/onnxruntime/test/onnx/TestCase.h @@ -19,8 +19,9 @@ class ValueInfoProto; //One test case can contain multiple test data(input/output pairs) class ITestCase { public: - virtual void LoadTestData(size_t id, onnxruntime::test::HeapBuffer& b, std::unordered_map& name_data_map, - bool is_input) = 0; + virtual void LoadTestData(size_t id, onnxruntime::test::HeapBuffer& b, + std::unordered_map& name_data_map, + bool is_input) const = 0; virtual const PATH_CHAR_TYPE* GetModelUrl() const = 0; virtual const std::string& GetNodeName() const = 0; virtual const ONNX_NAMESPACE::ValueInfoProto* GetOutputInfoFromModel(size_t i) const = 0; @@ -28,13 +29,13 @@ class ITestCase { virtual const std::string& GetTestCaseName() const = 0; virtual std::string GetTestCaseVersion() const = 0; //a string to help identify the dataset - virtual std::string GetDatasetDebugInfoString(size_t dataset_id) = 0; + virtual std::string GetDatasetDebugInfoString(size_t dataset_id) const = 0; //The number of input/output pairs virtual size_t GetDataCount() const = 0; virtual ~ITestCase() = default; - virtual ::onnxruntime::common::Status GetPerSampleTolerance(double* value) = 0; - virtual ::onnxruntime::common::Status GetRelativePerSampleTolerance(double* value) = 0; - virtual ::onnxruntime::common::Status GetPostProcessing(bool* value) = 0; + virtual ::onnxruntime::common::Status GetPerSampleTolerance(double* value) const = 0; + virtual ::onnxruntime::common::Status GetRelativePerSampleTolerance(double* value) const = 0; + virtual ::onnxruntime::common::Status GetPostProcessing(bool* value) const = 0; }; class TestModelInfo { @@ -57,9 +58,11 @@ class TestModelInfo { virtual std::string GetModelVersion() const { return ""; } virtual ~TestModelInfo() = default; - static TestModelInfo* LoadOnnxModel(_In_ const PATH_CHAR_TYPE* model_url); + static std::unique_ptr LoadOnnxModel(_In_ const PATH_CHAR_TYPE* model_url); static const std::string unknown_version; }; -ITestCase* CreateOnnxTestCase(const std::string& test_case_name, TestModelInfo* model, - double default_per_sample_tolerance, double default_relative_per_sample_tolerance); +std::unique_ptr CreateOnnxTestCase(const std::string& test_case_name, + std::unique_ptr model, + double default_per_sample_tolerance, + double default_relative_per_sample_tolerance); diff --git a/onnxruntime/test/onnx/TestCaseResult.cc b/onnxruntime/test/onnx/TestCaseResult.cc index ecb054028e..85be9e7fdd 100644 --- a/onnxruntime/test/onnx/TestCaseResult.cc +++ b/onnxruntime/test/onnx/TestCaseResult.cc @@ -4,5 +4,11 @@ #include "TestCaseResult.h" void TestCaseResult::SetResult(size_t task_id, EXECUTE_RESULT r) { - excution_result_[task_id] = r; + std::lock_guard guard(result_mutex_); + if (execution_result_[task_id] == EXECUTE_RESULT::NOT_SET) { + execution_result_[task_id] = r; + } else if (r != EXECUTE_RESULT::SUCCESS && execution_result_[task_id] == EXECUTE_RESULT::SUCCESS) { + // store first failure + execution_result_[task_id] = r; + } } diff --git a/onnxruntime/test/onnx/TestCaseResult.h b/onnxruntime/test/onnx/TestCaseResult.h index ce967facf1..702eb158df 100644 --- a/onnxruntime/test/onnx/TestCaseResult.h +++ b/onnxruntime/test/onnx/TestCaseResult.h @@ -6,10 +6,12 @@ #include #include #include +#include //result of a single test run: 1 model with 1 test dataset enum class EXECUTE_RESULT { - SUCCESS = 0, + NOT_SET = 0, + SUCCESS = 1, UNKNOWN_ERROR = -1, WITH_EXCEPTION = -2, RESULT_DIFFERS = -3, @@ -25,23 +27,24 @@ enum class EXECUTE_RESULT { class TestCaseResult { public: - TestCaseResult(size_t count, EXECUTE_RESULT result, const std::string& node_name1) : node_name(node_name1), excution_result_(count, result) { - ::onnxruntime::SetTimeSpecToZero(&spent_time_); + TestCaseResult(size_t count, EXECUTE_RESULT result, const std::string& node_name1) + : node_name(node_name1), execution_result_(count, result) { + onnxruntime::SetTimeSpecToZero(&spent_time_); } void SetResult(size_t task_id, EXECUTE_RESULT r); const std::vector& GetExcutionResult() const { - return excution_result_; + return execution_result_; } //Time spent in Session::Run. It only make sense when SeqTestRunner was used - ::onnxruntime::TIME_SPEC GetSpentTime() const { + onnxruntime::TIME_SPEC GetSpentTime() const { return spent_time_; } //Time spent in Session::Run. It only make sense when SeqTestRunner was used - void SetSpentTime(const ::onnxruntime::TIME_SPEC& input) const { + void SetSpentTime(const onnxruntime::TIME_SPEC& input) const { memcpy((void*)&spent_time_, &input, sizeof(input)); } @@ -49,6 +52,7 @@ class TestCaseResult { const std::string node_name; private: - ::onnxruntime::TIME_SPEC spent_time_; - std::vector excution_result_; + onnxruntime::TIME_SPEC spent_time_; + std::vector execution_result_; + std::mutex result_mutex_; }; diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index 18368d4511..21bc2f5513 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -85,7 +85,7 @@ int real_main(int argc, wchar_t* argv[], Ort::Env& env) { int real_main(int argc, char* argv[], Ort::Env& env) { #endif // if this var is not empty, only run the tests with name in this list - std::vector > whitelisted_test_cases; + std::vector> whitelisted_test_cases; int concurrent_session_runs = GetNumCpuCores(); bool enable_cpu_mem_arena = true; ExecutionMode execution_mode = ExecutionMode::ORT_SEQUENTIAL; @@ -250,7 +250,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) { return -1; } - std::vector > data_dirs; + std::vector> data_dirs; TestResultStat stat; for (int i = 0; i != argc; ++i) { @@ -437,7 +437,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) { ORT_TSTR("tf_mobilenet_v2_1.0_224"), ORT_TSTR("tf_mobilenet_v2_1.4_224"), ORT_TSTR("tf_nasnet_large"), ORT_TSTR("tf_pnasnet_large"), ORT_TSTR("tf_resnet_v1_50"), ORT_TSTR("tf_resnet_v1_101"), ORT_TSTR("tf_resnet_v1_101"), ORT_TSTR("tf_resnet_v2_101"), ORT_TSTR("tf_resnet_v2_152"), ORT_TSTR("batchnorm_example_training_mode"), ORT_TSTR("batchnorm_epsilon_training_mode")}; - std::unordered_set > all_disabled_tests(std::begin(immutable_broken_tests), std::end(immutable_broken_tests)); + std::unordered_set> all_disabled_tests(std::begin(immutable_broken_tests), std::end(immutable_broken_tests)); if (enable_cuda) { all_disabled_tests.insert(std::begin(cuda_flaky_tests), std::end(cuda_flaky_tests)); } @@ -455,9 +455,15 @@ int real_main(int argc, char* argv[], Ort::Env& env) { all_disabled_tests.insert(std::begin(x86_disabled_tests), std::end(x86_disabled_tests)); #endif + std::vector> owned_tests; std::vector tests; - LoadTests(data_dirs, whitelisted_test_cases, per_sample_tolerance, relative_per_sample_tolerance, all_disabled_tests, - [&tests](ITestCase* l) { tests.push_back(l); }); + + LoadTests(data_dirs, whitelisted_test_cases, per_sample_tolerance, relative_per_sample_tolerance, + all_disabled_tests, + [&owned_tests, &tests](std::unique_ptr l) { + tests.push_back(l.get()); + owned_tests.push_back(std::move(l)); + }); TestEnv args(tests, stat, env, sf); Status st = RunTests(args, p_models, concurrent_session_runs, static_cast(repeat_count), @@ -466,9 +472,6 @@ int real_main(int argc, char* argv[], Ort::Env& env) { fprintf(stderr, "%s\n", st.ErrorMessage().c_str()); return -1; } - for (ITestCase* l : tests) { - delete l; - } std::string res = stat.ToString(); fwrite(res.c_str(), 1, res.size(), stdout); } diff --git a/onnxruntime/test/onnx/runner.cc b/onnxruntime/test/onnx/runner.cc index 3dbcf26186..87f1c3797b 100644 --- a/onnxruntime/test/onnx/runner.cc +++ b/onnxruntime/test/onnx/runner.cc @@ -31,27 +31,26 @@ void ORT_CALLBACK RunTestCase(ORT_CALLBACK_INSTANCE pci, void* context, ORT_WORK OnnxRuntimeCloseThreadpoolWork(work); assert(context != nullptr); TestCaseTask* task(static_cast(context)); - ITestCase* info = task->env.tests[task->task_id]; + ITestCase& info = *task->env.tests[task->task_id]; std::shared_ptr ret; try { - RunSingleTestCase(info, task->env.env, task->env.sf, task->concurrent_runs, task->repeat_count, task->pool, pci, [task](std::shared_ptr result, ORT_CALLBACK_INSTANCE pci) { - return OnTestCaseFinished(pci, task, result); - }); - return; + RunSingleTestCase(info, task->env.env, task->env.sf, task->concurrent_runs, task->repeat_count, task->pool, pci, + [task](std::shared_ptr result, ORT_CALLBACK_INSTANCE pci) { + return OnTestCaseFinished(pci, task, result); + }); } catch (std::exception& ex) { - LOGF_DEFAULT(ERROR, "Test %s failed:%s", info->GetTestCaseName().c_str(), ex.what()); - std::string node_name = info->GetNodeName(); - ret = std::make_shared(info->GetDataCount(), EXECUTE_RESULT::WITH_EXCEPTION, node_name); - } - auto status = OnTestCaseFinished(pci, task, ret); - if (!status.IsOK()) { - LOGF_DEFAULT(ERROR, "FATAL ERROR"); - abort(); + LOGF_DEFAULT(ERROR, "Test %s failed:%s", info.GetTestCaseName().c_str(), ex.what()); + + ret = std::make_shared(info.GetDataCount(), EXECUTE_RESULT::WITH_EXCEPTION, info.GetNodeName()); + auto status = OnTestCaseFinished(pci, task, ret); + if (!status.IsOK()) { + ORT_THROW("OnTestCaseFinished failed: ", status.ErrorMessage()); + } } } void PTestRunner::Start(ORT_CALLBACK_INSTANCE, size_t concurrent_runs) { - concurrent_runs = std::min(std::max(1, concurrent_runs), c_->GetDataCount()); + concurrent_runs = std::min(std::max(1, concurrent_runs), c_.GetDataCount()); next_test_to_run = 0; for (size_t i = 0; i != concurrent_runs; ++i) { if (!ScheduleNew()) { @@ -62,7 +61,7 @@ void PTestRunner::Start(ORT_CALLBACK_INSTANCE, size_t concurrent_runs) { bool PTestRunner::ScheduleNew() { size_t next_test = next_test_to_run++; - if (next_test >= c_->GetDataCount()) return false; + if (next_test >= c_.GetDataCount()) return false; DataTask* t = new DataTask{this, next_test}; Status st = CreateAndSubmitThreadpoolWork(RunSingleDataItem, t, tpool_); if (!st.IsOK()) { @@ -76,22 +75,23 @@ bool PTestRunner::ScheduleNew() { void PTestRunner::OnTaskFinished(size_t, EXECUTE_RESULT, ORT_CALLBACK_INSTANCE pci) noexcept { try { ScheduleNew(); - if (++finished == c_->GetDataCount()) { + if (++finished == c_.GetDataCount()) { //For each test case, only one DataTask can reach here - finish(pci); + Finish(pci); } } catch (std::exception& ex) { - LOGF_DEFAULT(ERROR, "%s:unrecoverable error:%s,exit...\n", c_->GetTestCaseName().c_str(), ex.what()); + LOGF_DEFAULT(ERROR, "%s:unrecoverable error:%s,exit...\n", c_.GetTestCaseName().c_str(), ex.what()); abort(); } catch (...) { - LOGF_DEFAULT(ERROR, "%s:unrecoverable error,exit...\n", c_->GetTestCaseName().c_str()); + LOGF_DEFAULT(ERROR, "%s:unrecoverable error,exit...\n", c_.GetTestCaseName().c_str()); abort(); } } PTestRunner::PTestRunner(OrtSession* session1, - ITestCase* c, PThreadPool tpool, - TestCaseCallBack on_finished1) : DataRunner(session1, c->GetTestCaseName(), c, on_finished1), next_test_to_run(0), finished(0), tpool_(tpool) { + const ITestCase& c, PThreadPool tpool, + TestCaseCallBack on_finished1) + : DataRunner(session1, c.GetTestCaseName(), c, on_finished1), next_test_to_run(0), finished(0), tpool_(tpool) { } void ORT_CALLBACK RunSingleDataItem(ORT_CALLBACK_INSTANCE instance, void* context, ORT_WORK work) { @@ -100,34 +100,35 @@ void ORT_CALLBACK RunSingleDataItem(ORT_CALLBACK_INSTANCE instance, void* contex PTestRunner* env = task->env; const size_t task_id = task->task_id; delete task; - env->RunTask(task_id, instance, true); + env->RunTask(task_id, instance); } Status OnTestCaseFinished(ORT_CALLBACK_INSTANCE pci, TestCaseTask* task, std::shared_ptr result) { - FixedCountFinishCallback* finished = task->env.finished; auto task_id = task->task_id; bool failed = false; - { - std::unique_ptr unused(task); - TestEnv& env = task->env; - int next_test = env.next_test_to_run++; - if (static_cast(next_test) < env.tests.size()) { - //schedule the next TestCase - std::unique_ptr t(new TestCaseTask{env, next_test, task->concurrent_runs, task->repeat_count, task->pool}); - Status st = CreateAndSubmitThreadpoolWork(RunTestCase, t.get(), task->pool); - if (st.IsOK()) { - t.release(); - } else - return st; + + std::unique_ptr delete_finished_task(task); + + TestEnv& env = task->env; + int next_test = env.next_test_to_run++; + if (static_cast(next_test) < env.tests.size()) { + //schedule the next TestCase + std::unique_ptr t(new TestCaseTask{env, next_test, task->concurrent_runs, task->repeat_count, + task->pool}); + Status st = CreateAndSubmitThreadpoolWork(RunTestCase, t.get(), task->pool); + if (st.IsOK()) { + t.release(); + } else { + return st; } } - if (failed) - return finished->fail(pci); - return finished->onFinished(task_id, result, pci); + + return failed ? env.finished->Fail(pci) : env.finished->OnFinished(task_id, result, pci); } //Do not run this function in the thread pool passed in -static Status ParallelRunTests(TestEnv& env, int p_models, size_t current_runs, size_t repeat_count, PThreadPool pool) { +static Status ParallelRunTests(TestEnv& env, int p_models, size_t current_runs, size_t repeat_count, + PThreadPool pool) { p_models = static_cast(std::min(p_models, env.tests.size())); LOGF_DEFAULT(ERROR, "Running tests in parallel: at most %d models at any time", p_models); env.next_test_to_run = p_models; @@ -144,7 +145,7 @@ static Status ParallelRunTests(TestEnv& env, int p_models, size_t current_runs, throw; } } - bool ret = env.finished->wait(); + bool ret = env.finished->Wait(); if (!ret) { return Status(::onnxruntime::common::ONNXRUNTIME, ::onnxruntime::common::FAIL, "ParallelRunTests failed"); } @@ -155,10 +156,12 @@ static Status ParallelRunTests(TestEnv& env, int p_models, size_t current_runs, Status RunTests(TestEnv& env, int p_models, int concurrent_runs, size_t repeat_count, PThreadPool tpool) { TestResultStat& stat = env.stat; stat.total_model_count = env.tests.size(); - stat.total_test_case_count = std::accumulate(env.tests.begin(), env.tests.end(), static_cast(0), [](size_t v, const ITestCase* info) { - return info->GetDataCount() + v; - }); + stat.total_test_case_count = std::accumulate(env.tests.begin(), env.tests.end(), static_cast(0), + [](size_t v, const ITestCase* info) { + return info->GetDataCount() + v; + }); std::vector> results; + if (p_models > 1 && env.tests.size() > 1) { ORT_RETURN_IF_ERROR(ParallelRunTests(env, p_models, concurrent_runs, repeat_count, tpool)); results = env.finished->getResults(); @@ -169,10 +172,12 @@ Status RunTests(TestEnv& env, int p_models, int concurrent_runs, size_t repeat_c ORT_EVENT ev; ORT_RETURN_IF_ERROR(CreateOnnxRuntimeEvent(&ev)); try { - RunSingleTestCase(env.tests[i], env.env, env.sf, concurrent_runs, repeat_count, tpool, nullptr, [&results, ev](std::shared_ptr result, ORT_CALLBACK_INSTANCE pci) { - results.push_back(result); - return OnnxRuntimeSetEventWhenCallbackReturns(pci, ev); - }); + RunSingleTestCase(*env.tests[i], env.env, env.sf, concurrent_runs, repeat_count, tpool, nullptr, + [&results, ev](std::shared_ptr result, ORT_CALLBACK_INSTANCE pci) { + results.push_back(result); + return OnnxRuntimeSetEventWhenCallbackReturns(pci, ev); + }); + ORT_RETURN_IF_ERROR(WaitAndCloseEvent(ev)); } catch (std::exception& ex) { LOGF_DEFAULT(ERROR, "Test %s failed:%s", test_case_name, ex.what()); @@ -185,13 +190,15 @@ Status RunTests(TestEnv& env, int p_models, int concurrent_runs, size_t repeat_c } for (size_t i = 0; i != env.tests.size(); ++i) { if (!results[i]) { - stat.AddFailedTest(std::pair(env.tests[i]->GetTestCaseName(), env.tests[i]->GetTestCaseVersion())); + stat.AddFailedTest(std::pair(env.tests[i]->GetTestCaseName(), + env.tests[i]->GetTestCaseVersion())); continue; } const TestCaseResult& r = *results[i]; for (const EXECUTE_RESULT res : r.GetExcutionResult()) { if (res != EXECUTE_RESULT::SUCCESS && res != EXECUTE_RESULT::NOT_SUPPORT) { - stat.AddFailedTest(std::pair(env.tests[i]->GetTestCaseName(), env.tests[i]->GetTestCaseVersion())); + stat.AddFailedTest(std::pair(env.tests[i]->GetTestCaseName(), + env.tests[i]->GetTestCaseVersion())); } switch (res) { case EXECUTE_RESULT::SUCCESS: @@ -239,7 +246,7 @@ void LoadTests(const std::vector>& input_paths const std::vector>& whitelisted_test_cases, double default_per_sample_tolerance, double default_relative_per_sample_tolerance, const std::unordered_set>& disabled_tests, - const std::function& process_function) { + const std::function)>& process_function) { std::vector> paths(input_paths); while (!paths.empty()) { std::basic_string node_data_root_path = paths.back(); @@ -266,22 +273,31 @@ void LoadTests(const std::vector>& input_paths std::basic_string p = ConcatPathComponent(node_data_root_path, filename_str); - ITestCase* l = CreateOnnxTestCase(ToMBString(test_case_name), TestModelInfo::LoadOnnxModel(p.c_str()), - default_per_sample_tolerance, default_relative_per_sample_tolerance); - process_function(l); + std::unique_ptr model_info(TestModelInfo::LoadOnnxModel(p.c_str())); + std::unique_ptr l = CreateOnnxTestCase(ToMBString(test_case_name), std::move(model_info), + default_per_sample_tolerance, + default_relative_per_sample_tolerance); + process_function(std::move(l)); return true; }); } } SeqTestRunner::SeqTestRunner(OrtSession* session1, - ITestCase* c, size_t repeat_count, - TestCaseCallBack on_finished1) : DataRunner(session1, c->GetTestCaseName(), c, on_finished1), repeat_count_(repeat_count) { + const ITestCase& c, size_t repeat_count, + TestCaseCallBack on_finished1) + : DataRunner(session1, c.GetTestCaseName(), c, on_finished1), repeat_count_(repeat_count) { } -DataRunner::DataRunner(OrtSession* session1, const std::string& test_case_name1, ITestCase* c, TestCaseCallBack on_finished1) : test_case_name_(test_case_name1), c_(c), session(session1), on_finished(on_finished1), default_allocator(onnxruntime::make_unique()) { - std::string s = c->GetNodeName(); - result = std::make_shared(c->GetDataCount(), EXECUTE_RESULT::UNKNOWN_ERROR, s); +DataRunner::DataRunner(OrtSession* session1, const std::string& test_case_name1, const ITestCase& c, + TestCaseCallBack on_finished1) + : test_case_name_(test_case_name1), + c_(c), + session(session1), + on_finished(on_finished1), + default_allocator(onnxruntime::make_unique()) { + std::string s = c_.GetNodeName(); + result = std::make_shared(c_.GetDataCount(), EXECUTE_RESULT::NOT_SET, s); SetTimeSpecToZero(&spent_time_); } @@ -289,24 +305,23 @@ DataRunner::~DataRunner() { Ort::GetApi().ReleaseSession(session); } -void DataRunner::RunTask(size_t task_id, ORT_CALLBACK_INSTANCE pci, bool store_result) { +void DataRunner::RunTask(size_t task_id, ORT_CALLBACK_INSTANCE pci) { EXECUTE_RESULT res = EXECUTE_RESULT::UNKNOWN_ERROR; try { res = RunTaskImpl(task_id); } catch (std::exception& ex) { res = EXECUTE_RESULT::WITH_EXCEPTION; - LOGS_DEFAULT(ERROR) << c_->GetTestCaseName() << ":" << ex.what(); - } - if (store_result) { - result->SetResult(task_id, res); + LOGS_DEFAULT(ERROR) << c_.GetTestCaseName() << ":" << ex.what(); } + + result->SetResult(task_id, res); OnTaskFinished(task_id, res, pci); } EXECUTE_RESULT DataRunner::RunTaskImpl(size_t task_id) { onnxruntime::test::HeapBuffer holder; std::unordered_map feeds; - c_->LoadTestData(task_id, holder, feeds, true); + c_.LoadTestData(task_id, holder, feeds, true); // Create output feed size_t output_count = 0; @@ -341,8 +356,8 @@ EXECUTE_RESULT DataRunner::RunTaskImpl(size_t task_id) { } GetMonotonicTimeCounter(&start_time); Ort::ThrowOnError(Ort::GetApi().Run(session, nullptr, input_names.data(), input_values.Data(), - static_cast(input_values.Length()), output_names_raw_ptr.data(), output_count, - output_values.Data())); + static_cast(input_values.Length()), output_names_raw_ptr.data(), + output_count, output_values.Data())); } GetMonotonicTimeCounter(&end_time); AccumulateTimeSpec(&spent_time_, &start_time, &end_time); @@ -351,22 +366,22 @@ EXECUTE_RESULT DataRunner::RunTaskImpl(size_t task_id) { double relative_per_sample_tolerance; bool post_procesing; Status status; - if (!(status = c_->GetPerSampleTolerance(&per_sample_tolerance)).IsOK()) { + if (!(status = c_.GetPerSampleTolerance(&per_sample_tolerance)).IsOK()) { LOGS_DEFAULT(ERROR) << status.ErrorMessage() << "\n"; return StatusCodeToExecuteResult(status.Code()); } - if (!(status = c_->GetRelativePerSampleTolerance(&relative_per_sample_tolerance)).IsOK()) { + if (!(status = c_.GetRelativePerSampleTolerance(&relative_per_sample_tolerance)).IsOK()) { LOGS_DEFAULT(ERROR) << status.ErrorMessage() << "\n"; return StatusCodeToExecuteResult(status.Code()); } - if (!(status = c_->GetPostProcessing(&post_procesing)).IsOK()) { + if (!(status = c_.GetPostProcessing(&post_procesing)).IsOK()) { LOGS_DEFAULT(ERROR) << status.ErrorMessage() << "\n"; return StatusCodeToExecuteResult(status.Code()); } //TODO: if there are no output value files, just skip the validation std::unordered_map expected_output_values; - c_->LoadTestData(task_id, holder, expected_output_values, false); + c_.LoadTestData(task_id, holder, expected_output_values, false); std::unordered_map name_fetch_output_map; std::unordered_map name_output_value_info_proto; @@ -374,7 +389,7 @@ EXECUTE_RESULT DataRunner::RunTaskImpl(size_t task_id) { for (auto& output_name : output_names) { // p_fetches is filled in the order of output_names. name_fetch_output_map[output_name] = output_values.Get(i); - const ONNX_NAMESPACE::ValueInfoProto* infoProto = c_->GetOutputInfoFromModel(i); + const ONNX_NAMESPACE::ValueInfoProto* infoProto = c_.GetOutputInfoFromModel(i); if (infoProto != nullptr) name_output_value_info_proto.insert(std::make_pair(infoProto->name(), infoProto)); i++; } @@ -447,44 +462,54 @@ EXECUTE_RESULT DataRunner::RunTaskImpl(size_t task_id) { } void SeqTestRunner::Start(ORT_CALLBACK_INSTANCE pci, size_t) { - const size_t data_count = c_->GetDataCount(); - for (size_t idx_repeat = 0; idx_repeat != repeat_count_; ++idx_repeat) + const size_t data_count = c_.GetDataCount(); + for (size_t idx_repeat = 0; idx_repeat != repeat_count_; ++idx_repeat) { for (size_t idx_data = 0; idx_data != data_count; ++idx_data) { - RunTask(idx_data, nullptr, idx_repeat == 0); + RunTask(idx_data, nullptr); } - finish(pci); + } + + Finish(pci); } -void RunSingleTestCase(ITestCase* info, Ort::Env& env, const Ort::SessionOptions& sf, size_t concurrent_runs, size_t repeat_count, PThreadPool tpool, ORT_CALLBACK_INSTANCE pci, TestCaseCallBack on_finished) { - +void RunSingleTestCase(const ITestCase& info, Ort::Env& env, const Ort::SessionOptions& sf, + size_t concurrent_runs, size_t repeat_count, PThreadPool tpool, + ORT_CALLBACK_INSTANCE pci, TestCaseCallBack on_finished) { std::shared_ptr ret; - size_t data_count = info->GetDataCount(); + size_t data_count = info.GetDataCount(); try { - DataRunner* r = nullptr; - std::string node_name = info->GetNodeName(); + std::unique_ptr r; + std::string node_name = info.GetNodeName(); auto sf2 = sf.Clone(); - sf2.SetLogId(info->GetTestCaseName().c_str()); - Ort::Session session_object{env, info->GetModelUrl(), sf2}; - LOGF_DEFAULT(INFO, "testing %s\n", info->GetTestCaseName().c_str()); + sf2.SetLogId(info.GetTestCaseName().c_str()); + Ort::Session session_object{env, info.GetModelUrl(), sf2}; + LOGF_DEFAULT(INFO, "testing %s\n", info.GetTestCaseName().c_str()); //temp hack. Because we have no resource control. We may not have enough memory to run this test in parallel - if (info->GetTestCaseName() == "coreml_FNS-Candy_ImageNet") + if (info.GetTestCaseName() == "coreml_FNS-Candy_ImageNet") { concurrent_runs = 1; + } + if (concurrent_runs > 1 && data_count > 1) { - r = new PTestRunner(session_object.release(), info, tpool, on_finished); + r.reset(new PTestRunner(session_object.release(), info, tpool, on_finished)); } else { - r = new SeqTestRunner(session_object.release(), info, repeat_count, on_finished); + r.reset(new SeqTestRunner(session_object.release(), info, repeat_count, on_finished)); } r->Start(pci, concurrent_runs); + + // DataRunner::Finish will delete itself, so now that we know everything has started without any exceptions + // we can release it from the unique_ptr + r.release(); return; } catch (const Ort::Exception& ex) { - if (ex.GetOrtErrorCode() != ORT_NOT_IMPLEMENTED) + if (ex.GetOrtErrorCode() != ORT_NOT_IMPLEMENTED) { throw; + } - LOGF_DEFAULT(ERROR, "Test %s failed:%s", info->GetTestCaseName().c_str(), ex.what()); + LOGF_DEFAULT(ERROR, "Test %s failed:%s", info.GetTestCaseName().c_str(), ex.what()); std::string node_name; ret = std::make_shared(data_count, EXECUTE_RESULT::NOT_SUPPORT, ""); } catch (onnxruntime::NotImplementedException& ex) { - LOGF_DEFAULT(ERROR, "Test %s failed:%s", info->GetTestCaseName().c_str(), ex.what()); + LOGF_DEFAULT(ERROR, "Test %s failed:%s", info.GetTestCaseName().c_str(), ex.what()); std::string node_name; ret = std::make_shared(data_count, EXECUTE_RESULT::NOT_SUPPORT, ""); } diff --git a/onnxruntime/test/onnx/runner.h b/onnxruntime/test/onnx/runner.h index 5f07b6bd78..42973fc241 100644 --- a/onnxruntime/test/onnx/runner.h +++ b/onnxruntime/test/onnx/runner.h @@ -15,7 +15,8 @@ #include "testenv.h" #include "sync_api.h" -typedef std::function<::onnxruntime::common::Status(std::shared_ptr result, ORT_CALLBACK_INSTANCE pci)> TestCaseCallBack; +typedef std::function result, ORT_CALLBACK_INSTANCE pci)> + TestCaseCallBack; struct TestCaseTask { TestEnv& env; @@ -28,7 +29,8 @@ struct TestCaseTask { void ORT_CALLBACK RunTestCase(ORT_CALLBACK_INSTANCE instance, void* context, ORT_WORK work); void ORT_CALLBACK RunSingleDataItem(ORT_CALLBACK_INSTANCE instance, void* context, ORT_WORK work); -::onnxruntime::common::Status OnTestCaseFinished(ORT_CALLBACK_INSTANCE pci, TestCaseTask* task, std::shared_ptr result); +::onnxruntime::common::Status OnTestCaseFinished(ORT_CALLBACK_INSTANCE pci, TestCaseTask* task, + std::shared_ptr result); struct MockedOrtAllocator; @@ -37,7 +39,7 @@ class DataRunner { typedef TestCaseCallBack CALL_BACK; std::shared_ptr result; std::string test_case_name_; - ITestCase* c_; + const ITestCase& c_; //Time spent in Session::Run. It only make sense when SeqTestRunner was used ::onnxruntime::TIME_SPEC spent_time_; @@ -49,14 +51,15 @@ class DataRunner { ORT_DISALLOW_COPY_AND_ASSIGNMENT(DataRunner); public: - DataRunner(OrtSession* session1, const std::string& test_case_name1, ITestCase* c, TestCaseCallBack on_finished1); + DataRunner(OrtSession* session1, const std::string& test_case_name1, const ITestCase& c, + TestCaseCallBack on_finished1); virtual void OnTaskFinished(size_t task_id, EXECUTE_RESULT res, ORT_CALLBACK_INSTANCE pci) noexcept = 0; - void RunTask(size_t task_id, ORT_CALLBACK_INSTANCE pci, bool store_result); + void RunTask(size_t task_id, ORT_CALLBACK_INSTANCE pci); virtual ~DataRunner(); virtual void Start(ORT_CALLBACK_INSTANCE pci, size_t concurrent_runs) = 0; - void finish(ORT_CALLBACK_INSTANCE pci) { + void Finish(ORT_CALLBACK_INSTANCE pci) { std::shared_ptr res = result; CALL_BACK callback = on_finished; res->SetSpentTime(spent_time_); @@ -64,7 +67,7 @@ class DataRunner { for (size_t i = 0; i != er.size(); ++i) { EXECUTE_RESULT r = er[i]; if (r == EXECUTE_RESULT::SUCCESS) continue; - std::string s = c_->GetDatasetDebugInfoString(i); + std::string s = c_.GetDatasetDebugInfoString(i); switch (r) { case EXECUTE_RESULT::RESULT_DIFFERS: LOGF_DEFAULT(ERROR, "%s: result differs. Dataset:%s\n", test_case_name_.c_str(), s.c_str()); @@ -96,9 +99,7 @@ class SeqTestRunner : public DataRunner { size_t repeat_count_; public: - SeqTestRunner(OrtSession* session1, - ITestCase* c, size_t repeat_count, - TestCaseCallBack on_finished1); + SeqTestRunner(OrtSession* session1, const ITestCase& c, size_t repeat_count, TestCaseCallBack on_finished1); void Start(ORT_CALLBACK_INSTANCE pci, size_t concurrent_runs) override; void OnTaskFinished(size_t, EXECUTE_RESULT, ORT_CALLBACK_INSTANCE) noexcept override {} @@ -113,9 +114,7 @@ class PTestRunner : public DataRunner { public: void Start(ORT_CALLBACK_INSTANCE pci, size_t concurrent_runs) override; - PTestRunner(OrtSession* session1, - ITestCase* c, PThreadPool tpool, - TestCaseCallBack on_finished1); + PTestRunner(OrtSession* session1, const ITestCase& c, PThreadPool tpool, TestCaseCallBack on_finished1); private: bool ScheduleNew(); @@ -131,10 +130,10 @@ void LoadTests(const std::vector>& input_paths const std::vector>& whitelisted_test_cases, double default_per_sample_tolerance, double default_relative_per_sample_tolerance, const std::unordered_set>& disabled_tests, - const std::function& process_function); + const std::function)>& process_function); //Do not run this function in the thread pool passed in -::onnxruntime::common::Status RunTests(TestEnv& env, int p_models, int concurrent_runs, size_t repeat_count, PThreadPool tpool); +onnxruntime::common::Status RunTests(TestEnv& env, int p_models, int concurrent_runs, size_t repeat_count, PThreadPool tpool); EXECUTE_RESULT StatusCodeToExecuteResult(int input); -void RunSingleTestCase(ITestCase* info, Ort::Env& env, const Ort::SessionOptions& sf, size_t concurrent_runs, +void RunSingleTestCase(const ITestCase& info, Ort::Env& env, const Ort::SessionOptions& sf, size_t concurrent_runs, size_t repeat_count, PThreadPool tpool, ORT_CALLBACK_INSTANCE pci, TestCaseCallBack on_finished); diff --git a/onnxruntime/test/onnx/sync_api.cc b/onnxruntime/test/onnx/sync_api.cc index 2655ab07a2..c16a97c97b 100644 --- a/onnxruntime/test/onnx/sync_api.cc +++ b/onnxruntime/test/onnx/sync_api.cc @@ -88,8 +88,8 @@ Status OnnxRuntimeSetEventWhenCallbackReturns(ORT_CALLBACK_INSTANCE pci, ORT_EVE finish_event->finish_event_data.notify_all(); return Status::OK(); } - pci->AddEvent(finish_event); - return Status::OK(); + pci->AddEvent(finish_event); + return Status::OK(); } void OnnxRuntimeCallbackInstance::AddEvent(ORT_EVENT event) { diff --git a/onnxruntime/test/onnx/testenv.cc b/onnxruntime/test/onnx/testenv.cc index b5cb918605..3d27ebb4c9 100644 --- a/onnxruntime/test/onnx/testenv.cc +++ b/onnxruntime/test/onnx/testenv.cc @@ -6,10 +6,16 @@ #include using onnxruntime::Status; -TestEnv::TestEnv(const std::vector& tests1, TestResultStat& stat1, Ort::Env& env1, Ort::SessionOptions& sf1) - : tests(tests1), next_test_to_run(0), stat(stat1), finished(new FixedCountFinishCallback(static_cast(tests1.size()))), env(env1), sf(sf1) { +TestEnv::TestEnv(const std::vector& tests1, TestResultStat& stat1, Ort::Env& env1, + Ort::SessionOptions& sf1) + : tests(tests1), + next_test_to_run(0), + stat(stat1), + finished(new FixedCountFinishCallback(static_cast(tests1.size()))), + env(env1), + sf(sf1) { } TestEnv::~TestEnv() { - delete finished; + // need dtor in .cc so 'finished' can be cleaned up as TestCaseResult only has a forward declaration in the header. } diff --git a/onnxruntime/test/onnx/testenv.h b/onnxruntime/test/onnx/testenv.h index 6bc3607da4..4b2716faea 100644 --- a/onnxruntime/test/onnx/testenv.h +++ b/onnxruntime/test/onnx/testenv.h @@ -19,7 +19,7 @@ class TestEnv { std::vector tests; std::atomic_int next_test_to_run; TestResultStat& stat; - FixedCountFinishCallback* finished; + std::unique_ptr finished; Ort::Env& env; const Ort::SessionOptions& sf; TestEnv(const std::vector& tests, TestResultStat& stat1, Ort::Env& env, Ort::SessionOptions& sf1); diff --git a/onnxruntime/test/perftest/TFModelInfo.cc b/onnxruntime/test/perftest/TFModelInfo.cc index 856706082a..faf7bccccb 100644 --- a/onnxruntime/test/perftest/TFModelInfo.cc +++ b/onnxruntime/test/perftest/TFModelInfo.cc @@ -7,7 +7,7 @@ #include -TestModelInfo* TFModelInfo::Create(_In_ const PATH_CHAR_TYPE* model_url) { +std::unique_ptr TFModelInfo::Create(_In_ const PATH_CHAR_TYPE* model_url) { auto ret = std::unique_ptr(new TFModelInfo{}); ret->model_url_ = model_url; std::basic_string meta_file_path = model_url; @@ -44,11 +44,11 @@ TestModelInfo* TFModelInfo::Create(_In_ const PATH_CHAR_TYPE* model_url) { } else if (line.compare(0, 7, "output=") == 0) { ret->output_names_.push_back(line.substr(7)); } else { - ORT_THROW("unknow line:", line.size()); + ORT_THROW("unknown line:", line.size()); } } - return ret.release(); + return std::move(ret); } int TFModelInfo::GetInputCount() const { return static_cast(input_names_.size()); } diff --git a/onnxruntime/test/perftest/TFModelInfo.h b/onnxruntime/test/perftest/TFModelInfo.h index 2d90c3bd25..21dddca050 100644 --- a/onnxruntime/test/perftest/TFModelInfo.h +++ b/onnxruntime/test/perftest/TFModelInfo.h @@ -20,7 +20,7 @@ class TFModelInfo : public TestModelInfo { const std::string& GetOutputName(size_t i) const override; ~TFModelInfo() override = default; - static TestModelInfo* Create(_In_ const PATH_CHAR_TYPE* model_url); + static std::unique_ptr Create(_In_ const PATH_CHAR_TYPE* model_url); private: TFModelInfo() = default; diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 97ae257c7b..ed139249c1 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -25,7 +25,7 @@ namespace perftest { /*static*/ void CommandLineParser::ShowUsage() { printf( - "perf_test [options...] model_path result_file\n" + "perf_test [options...] model_path [result_file]\n" "Options:\n" "\t-m [test_mode]: Specifies the test mode. Value could be 'duration' or 'times'.\n" "\t\tProvide 'duration' to run the test for a fix duration, and 'times' to repeated for a certain times. \n" @@ -40,7 +40,7 @@ namespace perftest { "\t-r [repeated_times]: Specifies the repeated times if running in 'times' test mode.Default:1000.\n" "\t-t [seconds_to_run]: Specifies the seconds to run for 'duration' mode. Default:600.\n" "\t-p [profile_file]: Specifies the profile name to enable profiling and dump the profile data to the file.\n" - "\t-s: Show statistics result, like P75, P90.\n" + "\t-s: Show statistics result, like P75, P90. If no result_file provided this defaults to on.\n" "\t-v: Show verbose information.\n" "\t-x [intra_op_num_threads]: Sets the number of threads used to parallelize the execution within nodes, A value of 0 means ORT will pick a default. Must >=0.\n" "\t-y [inter_op_num_threads]: Sets the number of threads used to parallelize the execution of the graph (across nodes), A value of 0 means ORT will pick a default. Must >=0.\n" @@ -187,10 +187,19 @@ namespace perftest { // parse model_path and result_file_path argc -= optind; argv += optind; - if (argc != 2) return false; + + switch (argc) { + case 2: + test_config.model_info.result_file_path = argv[1]; + break; + case 1: + test_config.run_config.f_dump_statistics = true; + break; + default: + return false; + } test_config.model_info.model_file_path = argv[0]; - test_config.model_info.result_file_path = argv[1]; return true; } diff --git a/onnxruntime/test/perftest/main.cc b/onnxruntime/test/perftest/main.cc index 9feaa78789..a7be6335e3 100644 --- a/onnxruntime/test/perftest/main.cc +++ b/onnxruntime/test/perftest/main.cc @@ -6,6 +6,7 @@ #include #include "command_args_parser.h" #include "performance_runner.h" +#include using namespace onnxruntime; const OrtApi* g_ort = NULL; @@ -66,5 +67,8 @@ int main(int argc, char* argv[]) { fprintf(stderr, "%s\n", ex.what()); retval = -1; } + + ::google::protobuf::ShutdownProtobufLibrary(); + return retval; } diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index c3270d78f3..ae86b03243 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -26,13 +26,15 @@ std::chrono::duration OnnxRuntimeTestSession::Run() { OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device& rd, const PerformanceTestConfig& performance_test_config, - const TestModelInfo* m) - : rand_engine_(rd()), input_names_(m->GetInputCount()), input_length_(m->GetInputCount()) { + const TestModelInfo& m) + : rand_engine_(rd()), input_names_(m.GetInputCount()), input_length_(m.GetInputCount()) { Ort::SessionOptions session_options; const std::string& provider_name = performance_test_config.machine_config.provider_type_name; if (provider_name == onnxruntime::kDnnlExecutionProvider) { #ifdef USE_DNNL - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Dnnl(session_options, performance_test_config.run_config.enable_cpu_mem_arena ? 1 : 0)); + Ort::ThrowOnError( + OrtSessionOptionsAppendExecutionProvider_Dnnl(session_options, + performance_test_config.run_config.enable_cpu_mem_arena ? 1 : 0)); #else ORT_THROW("DNNL is not supported in this build\n"); #endif @@ -81,8 +83,9 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device #endif } else if (provider_name == onnxruntime::kAclExecutionProvider) { #ifdef USE_ACL - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ACL(session_options, - performance_test_config.run_config.enable_cpu_mem_arena ? 1 : 0)); + Ort::ThrowOnError( + OrtSessionOptionsAppendExecutionProvider_ACL(session_options, + performance_test_config.run_config.enable_cpu_mem_arena ? 1 : 0)); #else ORT_THROW("Acl is not supported in this build\n"); #endif @@ -146,32 +149,32 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device output_names_raw_ptr[i] = output_names_[i].c_str(); } - size_t input_count = static_cast(m->GetInputCount()); + size_t input_count = static_cast(m.GetInputCount()); for (size_t i = 0; i != input_count; ++i) { - input_names_[i] = strdup(m->GetInputName(i).c_str()); + input_names_[i] = strdup(m.GetInputName(i).c_str()); } } -bool OnnxRuntimeTestSession::PopulateGeneratedInputTestData() -{ +bool OnnxRuntimeTestSession::PopulateGeneratedInputTestData() { // iterate over all input nodes for (size_t i = 0; i < static_cast(input_length_); i++) { Ort::TypeInfo type_info = session_.GetInputTypeInfo(i); Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); if (type_info.GetONNXType() == ONNX_TYPE_TENSOR) { - auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); - std::vector input_node_dim = tensor_info.GetShape(); + auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); + std::vector input_node_dim = tensor_info.GetShape(); - // free dimensions are treated as 1 - for (int64_t& dim : input_node_dim) { - if (dim == -1) { - dim = 1; - } + // free dimensions are treated as 1 + for (int64_t& dim : input_node_dim) { + if (dim == -1) { + dim = 1; } - // default allocator doesn't have to be freed by user - auto allocator = static_cast(Ort::AllocatorWithDefaultOptions()); - Ort::Value input_tensor = Ort::Value::CreateTensor(allocator, (const int64_t*)input_node_dim.data(), input_node_dim.size(), tensor_info.GetElementType()); - PreLoadTestData(0, i, input_tensor.release()); + } + // default allocator doesn't have to be freed by user + auto allocator = static_cast(Ort::AllocatorWithDefaultOptions()); + Ort::Value input_tensor = Ort::Value::CreateTensor(allocator, (const int64_t*)input_node_dim.data(), + input_node_dim.size(), tensor_info.GetElementType()); + PreLoadTestData(0, i, input_tensor.release()); } } return true; diff --git a/onnxruntime/test/perftest/ort_test_session.h b/onnxruntime/test/perftest/ort_test_session.h index 7cfbe8ea6e..b67837754e 100644 --- a/onnxruntime/test/perftest/ort_test_session.h +++ b/onnxruntime/test/perftest/ort_test_session.h @@ -12,7 +12,7 @@ namespace perftest { class OnnxRuntimeTestSession : public TestSession { public: OnnxRuntimeTestSession(Ort::Env& env, std::random_device& rd, const PerformanceTestConfig& performance_test_config, - const TestModelInfo* m); + const TestModelInfo& m); void PreLoadTestData(size_t test_data_id, size_t input_id, OrtValue* value) override { if (test_inputs_.size() < test_data_id + 1) { diff --git a/onnxruntime/test/perftest/performance_runner.cc b/onnxruntime/test/perftest/performance_runner.cc index e31720d019..be2184c92b 100644 --- a/onnxruntime/test/perftest/performance_runner.cc +++ b/onnxruntime/test/perftest/performance_runner.cc @@ -41,6 +41,64 @@ Eigen::ThreadPoolInterface* GetDefaultThreadPool(const onnxruntime::Env& env) { namespace onnxruntime { namespace perftest { + +void PerformanceResult::DumpToFile(const std::basic_string& path, bool f_include_statistics) const { + bool have_file = !path.empty(); + std::ofstream outfile; + + if (have_file) { + outfile.open(path, std::ofstream::out | std::ofstream::app); + if (!outfile.good()) { + // at least provide some info on the run + std::cerr << "failed to open result file '" << path.c_str() << "'. will dump stats to output.\n"; + have_file = false; + f_include_statistics = true; + } + } + + if (have_file) { + for (size_t runs = 0; runs < time_costs.size(); runs++) { + outfile << model_name << "," << time_costs[runs] << "," << peak_workingset_size << "," + << average_CPU_usage << "," << runs << std::endl; + } + } else { + // match formatting of the initial output from PerformanceRunner::Run + std::cout << "Avg CPU usage:" << average_CPU_usage + << "\nPeak working set size:" << peak_workingset_size + << "\nRuns:" << time_costs.size() << std::endl; + } + + if (!time_costs.empty() && f_include_statistics) { + std::vector sorted_time = time_costs; + + size_t total = sorted_time.size(); + size_t n50 = static_cast(total * 0.5); + size_t n90 = static_cast(total * 0.9); + size_t n95 = static_cast(total * 0.95); + size_t n99 = static_cast(total * 0.99); + size_t n999 = static_cast(total * 0.999); + + std::sort(sorted_time.begin(), sorted_time.end()); + + auto output_stats = [&](std::ostream& ostream) { + ostream << "Min Latency is " << sorted_time[0] << "sec\n"; + ostream << "Max Latency is " << sorted_time[total - 1] << "sec\n"; + ostream << "P50 Latency is " << sorted_time[n50] << "sec\n"; + ostream << "P90 Latency is " << sorted_time[n90] << "sec\n"; + ostream << "P95 Latency is " << sorted_time[n95] << "sec\n"; + ostream << "P99 Latency is " << sorted_time[n99] << "sec\n"; + ostream << "P999 Latency is " << sorted_time[n999] << "sec" << std::endl; + }; + + if (have_file) { + outfile << std::endl; + output_stats(outfile); + } + + output_stats(std::cout); + } +} + Status PerformanceRunner::Run() { if (!Initialize()) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "failed to initialize."); @@ -51,7 +109,7 @@ Status PerformanceRunner::Run() { // TODO: start profiling // if (!performance_test_config_.run_config.profile_file.empty()) - performance_result_.start_ = std::chrono::high_resolution_clock::now(); + performance_result_.start = std::chrono::high_resolution_clock::now(); std::unique_ptr p_ICPUUsage = utils::CreateICPUUsage(); switch (performance_test_config_.run_config.test_mode) { @@ -64,7 +122,7 @@ Status PerformanceRunner::Run() { default: return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "unknown test mode."); } - performance_result_.end_ = std::chrono::high_resolution_clock::now(); + performance_result_.end = std::chrono::high_resolution_clock::now(); performance_result_.average_CPU_usage = p_ICPUUsage->GetUsage(); performance_result_.peak_workingset_size = utils::GetPeakWorkingSetSize(); @@ -72,7 +130,7 @@ Status PerformanceRunner::Run() { std::chrono::duration session_create_duration = session_create_end_ - session_create_start_; // TODO: end profiling // if (!performance_test_config_.run_config.profile_file.empty()) session_object->EndProfiling(); - std::chrono::duration inference_duration = performance_result_.end_ - performance_result_.start_; + std::chrono::duration inference_duration = performance_result_.end - performance_result_.start; std::cout << "Session creation time cost:" << session_create_duration.count() << " s" << std::endl << "Total inference time cost:" << performance_result_.total_time_cost << " s" << std::endl // sum of time taken by each request @@ -168,21 +226,24 @@ Status PerformanceRunner::ForkJoinRepeat() { return Status::OK(); } -static TestModelInfo* CreateModelInfo(const PerformanceTestConfig& performance_test_config_) { +static std::unique_ptr CreateModelInfo(const PerformanceTestConfig& performance_test_config_) { if (CompareCString(performance_test_config_.backend.c_str(), ORT_TSTR("ort")) == 0) { return TestModelInfo::LoadOnnxModel(performance_test_config_.model_info.model_file_path.c_str()); } + if (CompareCString(performance_test_config_.backend.c_str(), ORT_TSTR("tf")) == 0) { return TFModelInfo::Create(performance_test_config_.model_info.model_file_path.c_str()); } + ORT_NOT_IMPLEMENTED(ToMBString(performance_test_config_.backend), " is not supported"); } -static TestSession* CreateSession(Ort::Env& env, std::random_device& rd, - const PerformanceTestConfig& performance_test_config_, - TestModelInfo* test_model_info) { +static std::unique_ptr CreateSession(Ort::Env& env, std::random_device& rd, + const PerformanceTestConfig& performance_test_config_, + const TestModelInfo& test_model_info) { if (CompareCString(performance_test_config_.backend.c_str(), ORT_TSTR("ort")) == 0) { - return new OnnxRuntimeTestSession(env, rd, performance_test_config_, test_model_info); + return std::unique_ptr( + new OnnxRuntimeTestSession(env, rd, performance_test_config_, test_model_info)); } #ifdef HAVE_TENSORFLOW if (CompareCString(performance_test_config_.backend.c_str(), ORT_TSTR("tf")) == 0) { @@ -191,11 +252,12 @@ static TestSession* CreateSession(Ort::Env& env, std::random_device& rd, #endif ORT_NOT_IMPLEMENTED(ToMBString(performance_test_config_.backend), " is not supported"); } + PerformanceRunner::PerformanceRunner(Ort::Env& env, const PerformanceTestConfig& test_config, std::random_device& rd) : performance_test_config_(test_config), test_model_info_(CreateModelInfo(test_config)) { session_create_start_ = std::chrono::high_resolution_clock::now(); - session_.reset(CreateSession(env, rd, test_config, test_model_info_)); + session_ = CreateSession(env, rd, test_config, *test_model_info_); session_create_end_ = std::chrono::high_resolution_clock::now(); } @@ -216,7 +278,9 @@ bool PerformanceRunner::Initialize() { std::string narrow_model_name = ToMBString(model_name); performance_result_.model_name = narrow_model_name; - test_case_.reset(CreateOnnxTestCase(narrow_model_name, test_model_info_, 0.0, 0.0)); + // ownership semantics are a little unexpected here as the test case takes ownership of the model info + TestModelInfo* test_model_info = test_model_info_.get(); + test_case_ = CreateOnnxTestCase(narrow_model_name, std::move(test_model_info_), 0.0, 0.0); if (performance_test_config_.run_config.generate_model_input_binding) { return static_cast(session_.get())->PopulateGeneratedInputTestData(); @@ -232,19 +296,18 @@ bool PerformanceRunner::Initialize() { std::unordered_map feeds; test_case_->LoadTestData(test_data_id /* id */, b_, feeds, true); // Discard the names in feeds - int input_count = test_model_info_->GetInputCount(); + int input_count = test_model_info->GetInputCount(); for (int i = 0; i != input_count; ++i) { - auto iter = feeds.find(test_model_info_->GetInputName(i)); + auto iter = feeds.find(test_model_info->GetInputName(i)); if (iter == feeds.end()) { - std::cout << "there is no test input data for input " << test_model_info_->GetInputName(i) << " and model " + std::cout << "there is no test input data for input " << test_model_info->GetInputName(i) << " and model " << test_case_->GetTestCaseName() << std::endl; return false; } session_->PreLoadTestData(test_data_id, static_cast(i), iter->second); } } - test_case_.reset(nullptr); - test_model_info_ = nullptr; + return true; } diff --git a/onnxruntime/test/perftest/performance_runner.h b/onnxruntime/test/perftest/performance_runner.h index ce62ad58d7..09920366f0 100644 --- a/onnxruntime/test/perftest/performance_runner.h +++ b/onnxruntime/test/perftest/performance_runner.h @@ -28,55 +28,15 @@ namespace onnxruntime { namespace perftest { struct PerformanceResult { - std::chrono::time_point start_; - std::chrono::time_point end_; + std::chrono::time_point start; + std::chrono::time_point end; size_t peak_workingset_size{0}; short average_CPU_usage{0}; double total_time_cost{0}; std::vector time_costs; std::string model_name; - void DumpToFile(const std::basic_string& path, bool f_include_statistics = false) const { - std::ofstream outfile; - outfile.open(path, std::ofstream::out | std::ofstream::app); - if (!outfile.good()) { - printf("failed to open result file"); - return; - } - - for (size_t runs = 0; runs < time_costs.size(); runs++) { - outfile << model_name << "," << time_costs[runs] << "," << peak_workingset_size << "," << average_CPU_usage << "," << runs << std::endl; - } - - if (!time_costs.empty() && f_include_statistics) { - std::vector sorted_time = time_costs; - - size_t total = sorted_time.size(); - size_t n50 = static_cast(total * 0.5); - size_t n90 = static_cast(total * 0.9); - size_t n95 = static_cast(total * 0.95); - size_t n99 = static_cast(total * 0.99); - size_t n999 = static_cast(total * 0.999); - - std::sort(sorted_time.begin(), sorted_time.end()); - - outfile << std::endl; - auto output_stats = [&](std::ostream& ostream) { - ostream << "Min Latency is " << sorted_time[0] << "sec" << std::endl; - ostream << "Max Latency is " << sorted_time[total - 1] << "sec" << std::endl; - ostream << "P50 Latency is " << sorted_time[n50] << "sec" << std::endl; - ostream << "P90 Latency is " << sorted_time[n90] << "sec" << std::endl; - ostream << "P95 Latency is " << sorted_time[n95] << "sec" << std::endl; - ostream << "P99 Latency is " << sorted_time[n99] << "sec" << std::endl; - ostream << "P999 Latency is " << sorted_time[n999] << "sec" << std::endl; - }; - - output_stats(outfile); - output_stats(std::cout); - } - - outfile.close(); - } + void DumpToFile(const std::basic_string& path, bool f_include_statistics = false) const; }; class PerformanceRunner { @@ -143,7 +103,7 @@ class PerformanceRunner { std::chrono::time_point session_create_end_; PerformanceResult performance_result_; PerformanceTestConfig performance_test_config_; - TestModelInfo* test_model_info_; + std::unique_ptr test_model_info_; std::unique_ptr session_; onnxruntime::test::HeapBuffer b_; std::unique_ptr test_case_; diff --git a/onnxruntime/test/util/compare_ortvalue.cc b/onnxruntime/test/util/compare_ortvalue.cc index 1f9b39256d..fc3d229d68 100644 --- a/onnxruntime/test/util/compare_ortvalue.cc +++ b/onnxruntime/test/util/compare_ortvalue.cc @@ -37,7 +37,6 @@ #pragma warning(pop) #endif - #include "core/graph/onnx_protobuf.h" #include "core/framework/tensorprotoutils.h" #include "core/framework/utils.h" @@ -90,7 +89,7 @@ std::pair CompareFloatResult(const Tensor& outvalue std::ostringstream oss; oss << std::hex << "expected " << expected_output[di] << " (" << expected_int << "), got " << real_value << " (" << real_int << ")" - << ", diff: " << diff << ", tol=" << tol << "."; + << ", diff: " << diff << ", tol=" << tol << std::dec << " idx=" << di << "."; res.second = oss.str(); max_diff = diff; }