From 1b1bddaab3aea5ce3c413fc6dadebb5fd9625ecc Mon Sep 17 00:00:00 2001 From: Michael Suo Date: Sun, 4 Aug 2019 15:59:58 -0700 Subject: [PATCH] Revert D16469619: Add Virtual Memory and CPU percentage computation to AIBench Differential Revision: D16469619 Original commit changeset: 670f3549c830 fbshipit-source-id: f55d4cda36f5e29df2df306d33a70158e5a7908b --- binaries/benchmark_args.h | 5 -- binaries/benchmark_helper.cc | 75 ++++--------------- binaries/benchmark_helper.h | 3 +- binaries/caffe2_benchmark.cc | 1 - modules/observers/net_observer_reporter.h | 1 - .../observers/net_observer_reporter_print.cc | 50 +++---------- modules/observers/perf_observer.cc | 57 +++++--------- modules/observers/perf_observer.h | 14 ++-- 8 files changed, 54 insertions(+), 152 deletions(-) diff --git a/binaries/benchmark_args.h b/binaries/benchmark_args.h index 8a7826d5f99..891f62622ae 100644 --- a/binaries/benchmark_args.h +++ b/binaries/benchmark_args.h @@ -50,11 +50,6 @@ C10_DEFINE_string( "Input type when specifying the input dimension." "The supported types are float, uint8_t."); C10_DEFINE_int(iter, 10, "The number of iterations to run."); -C10_DEFINE_bool( - measure_memory, - false, - "Whether to measure increase in allocated memory while " - "loading and running the net."); C10_DEFINE_string(net, "", "The given net to benchmark."); C10_DEFINE_string( output, diff --git a/binaries/benchmark_helper.cc b/binaries/benchmark_helper.cc index 958e0bf4efe..01d92a25bc7 100644 --- a/binaries/benchmark_helper.cc +++ b/binaries/benchmark_helper.cc @@ -35,14 +35,6 @@ #include #include -#if defined(TARGET_OS_MAC) || \ -defined(TARGET_OS_IPHONE) || \ -defined(TARGET_IPHONE_SIMULATOR) -#include -#else -#include -#endif - using std::map; using std::shared_ptr; using std::string; @@ -243,7 +235,7 @@ void fillInputBlob( void runNetwork( shared_ptr workspace, - caffe2::NetBase* net, + caffe2::NetDef& net_def, map& tensor_protos_map, const bool wipe_cache, const bool run_individual, @@ -258,6 +250,13 @@ void runNetwork( const std::string& output, const std::string& output_folder) { + if (!net_def.has_name()) { + net_def.set_name("benchmark"); + } + + caffe2::NetBase* net = workspace->CreateNet(net_def); + CHECK_NOTNULL(net); + LOG(INFO) << "Starting benchmark."; caffe2::ObserverConfig::initSampleRate(1, 1, 1, run_individual, warmup); LOG(INFO) << "Running warmup runs."; @@ -377,35 +376,6 @@ void writeOutput( } } -void logBenchmarkResult( - const std::string& type, - const std::string& metric, - const std::string& unit, - const int value) { - LOG(INFO) << caffe2::NetObserverReporterPrint::IDENTIFIER << "{" - << "\"type\": \"" << type << "\", " - << "\"metric\": \"" << metric << "\", " - << "\"unit\": \"" << unit << "\", " - << "\"value\": " << c10::to_string(value) << "}\n"; -} - -long getVirtualMemoryIfOptionEnabled(bool FLAGS_measure_memory) { - if (FLAGS_measure_memory) { -#if defined(TARGET_OS_IPHONE) || \ -defined(TARGET_OS_MAC) || \ -defined(TARGET_IPHONE_SIMULATOR) - malloc_statistics_t stats = {0}; - malloc_zone_statistics(nullptr, &stats); - return stats.size_allocated; -#else - struct mallinfo info = mallinfo(); - return info.uordblks; -#endif - } - - return 0; -} - int benchmark( int argc, char* argv[], @@ -416,7 +386,6 @@ int benchmark( const string& FLAGS_input_file, const string& FLAGS_input_type, int FLAGS_iter, - bool FLAGS_measure_memory, const string& FLAGS_net, const string& FLAGS_output, const string& FLAGS_output_folder, @@ -454,15 +423,19 @@ int benchmark( auto workspace = std::make_shared(new caffe2::Workspace()); bool run_on_gpu = backendCudaSet(FLAGS_backend); - // Run initialization network, measure resources used. - long init_vmem = getVirtualMemoryIfOptionEnabled(FLAGS_measure_memory); + // Run initialization network. caffe2::NetDef init_net_def; CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_init_net, &init_net_def)); setOperatorEngine(&init_net_def, FLAGS_backend); CAFFE_ENFORCE(workspace->RunNetOnce(init_net_def)); - init_vmem = getVirtualMemoryIfOptionEnabled(FLAGS_measure_memory) - init_vmem; + + // Run main network. + caffe2::NetDef net_def; + CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_net, &net_def)); + setOperatorEngine(&net_def, FLAGS_backend); map tensor_protos_map; + int num_blobs = loadInput( workspace, run_on_gpu, @@ -472,19 +445,9 @@ int benchmark( FLAGS_input_dims, FLAGS_input_type); - // Run main network. - long predict_vmem = getVirtualMemoryIfOptionEnabled(FLAGS_measure_memory); - caffe2::NetDef net_def; - CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_net, &net_def)); - setOperatorEngine(&net_def, FLAGS_backend); - if (!net_def.has_name()) { - net_def.set_name("benchmark"); - } - caffe2::NetBase* net = workspace->CreateNet(net_def); - CHECK_NOTNULL(net); runNetwork( workspace, - net, + net_def, tensor_protos_map, FLAGS_wipe_cache, FLAGS_run_individual, @@ -498,12 +461,6 @@ int benchmark( FLAGS_sleep_between_net_and_operator, FLAGS_output, FLAGS_output_folder); - predict_vmem = getVirtualMemoryIfOptionEnabled( - FLAGS_measure_memory) - predict_vmem; - if (FLAGS_measure_memory) { - logBenchmarkResult( - "NET_", "memory", "kB", (init_vmem + predict_vmem) / 1024); - } return 0; } diff --git a/binaries/benchmark_helper.h b/binaries/benchmark_helper.h index f5630c112d1..d09ba4e9c55 100644 --- a/binaries/benchmark_helper.h +++ b/binaries/benchmark_helper.h @@ -125,7 +125,7 @@ void writeOutput( const int num_blobs); void runNetwork( shared_ptr workspace, - caffe2::NetBase* net, + caffe2::NetDef& net_def, map& tensor_protos_map, const bool wipe_cache, const bool run_individual, @@ -149,7 +149,6 @@ int benchmark( const string& FLAGS_input_file, const string& FLAGS_input_type, int FLAGS_iter, - bool FLAGS_measure_memory, const string& FLAGS_net, const string& FLAGS_output, const string& FLAGS_output_folder, diff --git a/binaries/caffe2_benchmark.cc b/binaries/caffe2_benchmark.cc index 19ef223bdea..57bd98d4972 100644 --- a/binaries/caffe2_benchmark.cc +++ b/binaries/caffe2_benchmark.cc @@ -22,7 +22,6 @@ int main(int argc, char** argv) { FLAGS_input_file, FLAGS_input_type, FLAGS_iter, - FLAGS_measure_memory, FLAGS_net, FLAGS_output, FLAGS_output_folder, diff --git a/modules/observers/net_observer_reporter.h b/modules/observers/net_observer_reporter.h index bfccef64cee..5619b69a636 100644 --- a/modules/observers/net_observer_reporter.h +++ b/modules/observers/net_observer_reporter.h @@ -19,7 +19,6 @@ struct PerformanceInformation { std::string type = ""; // the type of the operator // Measured double latency = 0; - double cpuMilliseconds = 0; }; class CAFFE2_OBSERVER_API NetObserverReporter { diff --git a/modules/observers/net_observer_reporter_print.cc b/modules/observers/net_observer_reporter_print.cc index 6baa7e01728..e00fe8c1fcc 100644 --- a/modules/observers/net_observer_reporter_print.cc +++ b/modules/observers/net_observer_reporter_print.cc @@ -1,6 +1,5 @@ #include "observers/net_observer_reporter_print.h" -#include #include #include "caffe2/core/init.h" #include "observers/observer_config.h" @@ -10,7 +9,6 @@ namespace caffe2 { const std::string NetObserverReporterPrint::IDENTIFIER = "Caffe2Observer "; static std::string get_op_args(PerformanceInformation p); static std::string get_tensor_shapes(PerformanceInformation p); -static std::string sanatize(std::string json_s); void NetObserverReporterPrint::report( NetBase* net, @@ -25,49 +23,29 @@ void NetObserverReporterPrint::report( {"value", c10::to_string(p.second.latency * 1000)}, {"unit", "us"}, {"metric", "latency"}}); - caffe2_perf.push_back({{"type", "NET_"}, - { - "value", - c10::to_string( - p.second.cpuMilliseconds / - p.second.latency * - 100), - }, - {"unit", "percent"}, - {"metric", "cpu_percent"}}); } else if (p.first != "NET_DELAY") { // for operator perf std::string shape_str = get_tensor_shapes(p.second); std::string args_str = get_op_args(p.second); - std::string type = p.first; - caffe2_perf.push_back({{"type", type}, + + caffe2_perf.push_back({{"type", p.first}, {"value", c10::to_string(p.second.latency * 1000)}, {"unit", "us"}, {"metric", "latency"}}); - caffe2_perf.push_back({{"type", type}, - { - "value", - c10::to_string( - p.second.cpuMilliseconds / - p.second.latency * - 100), - }, - {"unit", "percent"}, - {"metric", "cpu_percent"}}); if (p.second.flops > 0) { - caffe2_perf.push_back({{"type", type}, + caffe2_perf.push_back({{"type", p.first}, {"value", c10::to_string(p.second.flops)}, {"unit", "flop"}, {"metric", "flops"}}); } if (shape_str != "") { - caffe2_perf.push_back({{"type", type}, + caffe2_perf.push_back({{"type", p.first}, {"info_string", shape_str}, {"unit", ""}, {"metric", "tensor_shapes"}}); } if (args_str != "") { - caffe2_perf.push_back({{"type", type}, + caffe2_perf.push_back({{"type", p.first}, {"info_string", args_str}, {"unit", ""}, {"metric", "op_args"}}); @@ -79,13 +57,13 @@ void NetObserverReporterPrint::report( std::stringstream buffer; auto entry = *it; buffer << IDENTIFIER << "{"; - buffer << "\"type\": \"" << sanatize(entry["type"]) << "\"," - << "\"unit\": \"" << sanatize(entry["unit"]) << "\"," - << "\"metric\": \"" << sanatize(entry["metric"]) << "\","; + buffer << "\"type\": \"" << entry["type"] << "\"," + << "\"unit\": \"" << entry["unit"] << "\"," + << "\"metric\": \"" << entry["metric"] << "\","; if (entry.find("value") != entry.end()) { - buffer << "\"value\": \"" << sanatize(entry["value"]) << "\""; + buffer << "\"value\": \"" << entry["value"] << "\""; } else if (entry.find("info_string") != entry.end()) { - buffer << "\"info_string\": \"" << sanatize(entry["info_string"]) << "\""; + buffer << "\"info_string\": \"" << entry["info_string"] << "\""; } buffer << "}"; LOG(INFO) << buffer.str(); @@ -139,12 +117,4 @@ static std::string get_op_args(PerformanceInformation p) { } return args_str; } - -static std::string sanatize(std::string json_s) { - // Remove illegal characters from the name that would cause json string to - // become invalid - json_s.erase(std::remove(json_s.begin(), json_s.end(), '"'), json_s.end()); - json_s.erase(std::remove(json_s.begin(), json_s.end(), '\\'), json_s.end()); - return json_s; -} } diff --git a/modules/observers/perf_observer.cc b/modules/observers/perf_observer.cc index 3264b518d3e..2de1ce65114 100644 --- a/modules/observers/perf_observer.cc +++ b/modules/observers/perf_observer.cc @@ -6,7 +6,6 @@ #endif #include -#include #include "caffe2/core/common.h" #include "caffe2/core/init.h" #include "caffe2/core/operator.h" @@ -63,17 +62,6 @@ bool registerGlobalPerfNetObserverCreator(int* /*pargc*/, char*** /*pargv*/) { } } // namespace -double getClockTimeMilliseconds(clockid_t clk_id) { - int result; - struct timespec tp; - result = clock_gettime(clk_id, &tp); - if (result == -1) { - return 0.0; - } else { - return tp.tv_sec * 1000.0 + tp.tv_nsec / 1000000.0; - } -} - REGISTER_CAFFE2_EARLY_INIT_FUNCTION( registerGlobalPerfNetObserverCreator, ®isterGlobalPerfNetObserverCreator, @@ -119,31 +107,30 @@ void PerfNetObserver::Start() { } } - wallMilliseconds_ = getClockTimeMilliseconds(CLOCK_MONOTONIC); - cpuMilliseconds_ = getClockTimeMilliseconds(CLOCK_PROCESS_CPUTIME_ID); + if (logType_ != PerfNetObserver::NONE) { + /* Only start timer when we need to */ + timer_.Start(); + } } void PerfNetObserver::Stop() { if (logType_ == PerfNetObserver::NONE) { return; } + auto currentRunTime = timer_.MilliSeconds(); std::map info; PerformanceInformation net_perf; - net_perf.cpuMilliseconds = - getClockTimeMilliseconds(CLOCK_PROCESS_CPUTIME_ID) - cpuMilliseconds_; - net_perf.latency = - getClockTimeMilliseconds(CLOCK_MONOTONIC) - wallMilliseconds_; - + net_perf.latency = currentRunTime; if (logType_ == PerfNetObserver::OPERATOR_DELAY) { const auto& operators = subject_->GetOperators(); for (int idx = 0; idx < operators.size(); ++idx) { const auto* op = operators[idx]; auto name = getObserverName(op, idx); PerformanceInformation p; - const PerfOperatorObserver* opObserver = - static_cast(observerMap_[op]); - p.latency = opObserver->getWallMilliseconds(); - p.cpuMilliseconds = opObserver->getCpuMilliseconds(); + + p.latency = static_cast(observerMap_[op]) + ->getMilliseconds(); + p.engine = op->engine(); p.type = op->type(); p.tensor_shapes = @@ -189,34 +176,30 @@ PerfOperatorObserver::PerfOperatorObserver( PerfNetObserver* netObserver) : ObserverBase(op), netObserver_(netObserver), - wallMilliseconds_(0), - cpuMilliseconds_(0) { + milliseconds_(0) { CAFFE_ENFORCE(netObserver_, "Observers can't operate outside of the net"); } PerfOperatorObserver::~PerfOperatorObserver() {} void PerfOperatorObserver::Start() { - wallMilliseconds_ = getClockTimeMilliseconds(CLOCK_MONOTONIC); - cpuMilliseconds_ = getClockTimeMilliseconds(CLOCK_PROCESS_CPUTIME_ID); + /* Get the time from the start of the net minus the time spent + in previous invocations. It is the time spent on other operators. + This way, when the operator finishes, the time from the start of the net + minus the time spent in all other operators is the total time on this + operator. This is done to avoid saving a timer in each operator */ + milliseconds_ = netObserver_->getTimer().MilliSeconds() - milliseconds_; } void PerfOperatorObserver::Stop() { /* Time from the start of the net minus the time spent on all other operators is the time spent on this operator */ - cpuMilliseconds_ = - getClockTimeMilliseconds(CLOCK_PROCESS_CPUTIME_ID) - cpuMilliseconds_; - wallMilliseconds_ = - getClockTimeMilliseconds(CLOCK_MONOTONIC) - wallMilliseconds_; + milliseconds_ = netObserver_->getTimer().MilliSeconds() - milliseconds_; tensor_shapes_ = subject_->InputTensorShapes(); } -double PerfOperatorObserver::getWallMilliseconds() const { - return wallMilliseconds_; -} - -double PerfOperatorObserver::getCpuMilliseconds() const { - return cpuMilliseconds_; +double PerfOperatorObserver::getMilliseconds() const { + return milliseconds_; } std::vector PerfOperatorObserver::getTensorShapes() const { diff --git a/modules/observers/perf_observer.h b/modules/observers/perf_observer.h index 71e1190e840..11fb870a619 100644 --- a/modules/observers/perf_observer.h +++ b/modules/observers/perf_observer.h @@ -10,13 +10,16 @@ namespace caffe2 { -double getClockTimeMilliseconds(); class CAFFE2_OBSERVER_API PerfNetObserver : public NetObserver { public: explicit PerfNetObserver(NetBase* subject_); virtual ~PerfNetObserver(); + caffe2::Timer& getTimer() { + return timer_; + } + private: void Start() override; void Stop() override; @@ -34,8 +37,7 @@ class CAFFE2_OBSERVER_API PerfNetObserver : public NetObserver { std::unordered_map*> observerMap_; - double wallMilliseconds_; - double cpuMilliseconds_; + caffe2::Timer timer_; }; class PerfOperatorObserver : public ObserverBase { @@ -43,8 +45,7 @@ class PerfOperatorObserver : public ObserverBase { PerfOperatorObserver(OperatorBase* op, PerfNetObserver* netObserver); virtual ~PerfOperatorObserver(); - double getWallMilliseconds() const; - double getCpuMilliseconds() const; + double getMilliseconds() const; std::vector getTensorShapes() const; private: @@ -59,8 +60,7 @@ class PerfOperatorObserver : public ObserverBase { // without storing inside the operator observer. Each field is memory // costly here and a raw pointer is a cheapest sholution PerfNetObserver* netObserver_; - double wallMilliseconds_; - double cpuMilliseconds_; + double milliseconds_; std::vector tensor_shapes_; }; } // namespace caffe2