Revert D16469619: Add Virtual Memory and CPU percentage computation to AIBench

Differential Revision: D16469619 Original commit changeset: 670f3549c830 fbshipit-source-id: f55d4cda36f5e29df2df306d33a70158e5a7908b
2026-05-14 20:57:59 +00:00 · 2019-08-04 15:59:58 -07:00 · 2019-08-04 15:59:58 -07:00 · 1b1bddaab3
commit 1b1bddaab3
parent cbf05305c0
8 changed files with 54 additions and 152 deletions
--- a/binaries/benchmark_args.h
+++ b/binaries/benchmark_args.h
@ -50,11 +50,6 @@ C10_DEFINE_string(
    "Input type when specifying the input dimension."
    "The supported types are float, uint8_t.");
 C10_DEFINE_int(iter, 10, "The number of iterations to run.");
-C10_DEFINE_bool(
-    measure_memory,
-    false,
-    "Whether to measure increase in allocated memory while "
-    "loading and running the net.");
 C10_DEFINE_string(net, "", "The given net to benchmark.");
 C10_DEFINE_string(
    output,
--- a/binaries/benchmark_helper.cc
+++ b/binaries/benchmark_helper.cc
@ -35,14 +35,6 @@
 #include <observers/observer_config.h>
 #include <observers/perf_observer.h>

-#if defined(TARGET_OS_MAC) || \
-defined(TARGET_OS_IPHONE) || \
-defined(TARGET_IPHONE_SIMULATOR)
-#include <malloc/malloc.h>
-#else
-#include <malloc.h>
-#endif
-
 using std::map;
 using std::shared_ptr;
 using std::string;
@ -243,7 +235,7 @@ void fillInputBlob(

 void runNetwork(
    shared_ptr<caffe2::Workspace> workspace,
-    caffe2::NetBase* net,
+    caffe2::NetDef& net_def,
    map<string, caffe2::TensorProtos>& tensor_protos_map,
    const bool wipe_cache,
    const bool run_individual,
@ -258,6 +250,13 @@ void runNetwork(
    const std::string& output,
    const std::string& output_folder) {

+  if (!net_def.has_name()) {
+    net_def.set_name("benchmark");
+  }
+
+  caffe2::NetBase* net = workspace->CreateNet(net_def);
+  CHECK_NOTNULL(net);
+
  LOG(INFO) << "Starting benchmark.";
  caffe2::ObserverConfig::initSampleRate(1, 1, 1, run_individual, warmup);
  LOG(INFO) << "Running warmup runs.";
@ -377,35 +376,6 @@ void writeOutput(
  }
 }

-void logBenchmarkResult(
-    const std::string& type,
-    const std::string& metric,
-    const std::string& unit,
-    const int value) {
-  LOG(INFO) << caffe2::NetObserverReporterPrint::IDENTIFIER << "{"
-            << "\"type\": \"" << type << "\", "
-            << "\"metric\": \"" << metric << "\", "
-            << "\"unit\": \"" << unit << "\", "
-            << "\"value\": " << c10::to_string(value) << "}\n";
-}
-
-long getVirtualMemoryIfOptionEnabled(bool FLAGS_measure_memory) {
-  if (FLAGS_measure_memory) {
-#if defined(TARGET_OS_IPHONE) || \
-defined(TARGET_OS_MAC) || \
-defined(TARGET_IPHONE_SIMULATOR)
-    malloc_statistics_t stats = {0};
-    malloc_zone_statistics(nullptr, &stats);
-    return stats.size_allocated;
-#else
-    struct mallinfo info = mallinfo();
-    return info.uordblks;
-#endif
-  }
-
-  return 0;
-}
-
 int benchmark(
    int argc,
    char* argv[],
@ -416,7 +386,6 @@ int benchmark(
    const string& FLAGS_input_file,
    const string& FLAGS_input_type,
    int FLAGS_iter,
-    bool FLAGS_measure_memory,
    const string& FLAGS_net,
    const string& FLAGS_output,
    const string& FLAGS_output_folder,
@ -454,15 +423,19 @@ int benchmark(

  auto workspace = std::make_shared<caffe2::Workspace>(new caffe2::Workspace());
  bool run_on_gpu = backendCudaSet(FLAGS_backend);
-  // Run initialization network, measure resources used.
-  long init_vmem = getVirtualMemoryIfOptionEnabled(FLAGS_measure_memory);
+  // Run initialization network.
  caffe2::NetDef init_net_def;
  CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_init_net, &init_net_def));
  setOperatorEngine(&init_net_def, FLAGS_backend);
  CAFFE_ENFORCE(workspace->RunNetOnce(init_net_def));
-  init_vmem = getVirtualMemoryIfOptionEnabled(FLAGS_measure_memory) - init_vmem;
+
+  // Run main network.
+  caffe2::NetDef net_def;
+  CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_net, &net_def));
+  setOperatorEngine(&net_def, FLAGS_backend);

  map<string, caffe2::TensorProtos> tensor_protos_map;
+
  int num_blobs = loadInput(
      workspace,
      run_on_gpu,
@ -472,19 +445,9 @@ int benchmark(
      FLAGS_input_dims,
      FLAGS_input_type);

-  // Run main network.
-  long predict_vmem = getVirtualMemoryIfOptionEnabled(FLAGS_measure_memory);
-  caffe2::NetDef net_def;
-  CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_net, &net_def));
-  setOperatorEngine(&net_def, FLAGS_backend);
-  if (!net_def.has_name()) {
-    net_def.set_name("benchmark");
-  }
-  caffe2::NetBase* net = workspace->CreateNet(net_def);
-  CHECK_NOTNULL(net);
  runNetwork(
      workspace,
-      net,
+      net_def,
      tensor_protos_map,
      FLAGS_wipe_cache,
      FLAGS_run_individual,
@ -498,12 +461,6 @@ int benchmark(
      FLAGS_sleep_between_net_and_operator,
      FLAGS_output,
      FLAGS_output_folder);
-  predict_vmem = getVirtualMemoryIfOptionEnabled(
-      FLAGS_measure_memory) - predict_vmem;
-  if (FLAGS_measure_memory) {
-    logBenchmarkResult(
-        "NET_", "memory", "kB", (init_vmem + predict_vmem) / 1024);
-  }

  return 0;
 }
--- a/binaries/benchmark_helper.h
+++ b/binaries/benchmark_helper.h
@ -125,7 +125,7 @@ void writeOutput(
    const int num_blobs);
 void runNetwork(
    shared_ptr<caffe2::Workspace> workspace,
-    caffe2::NetBase* net,
+    caffe2::NetDef& net_def,
    map<string, caffe2::TensorProtos>& tensor_protos_map,
    const bool wipe_cache,
    const bool run_individual,
@ -149,7 +149,6 @@ int benchmark(
    const string& FLAGS_input_file,
    const string& FLAGS_input_type,
    int FLAGS_iter,
-    bool FLAGS_measure_memory,
    const string& FLAGS_net,
    const string& FLAGS_output,
    const string& FLAGS_output_folder,
--- a/binaries/caffe2_benchmark.cc
+++ b/binaries/caffe2_benchmark.cc
@ -22,7 +22,6 @@ int main(int argc, char** argv) {
      FLAGS_input_file,
      FLAGS_input_type,
      FLAGS_iter,
-      FLAGS_measure_memory,
      FLAGS_net,
      FLAGS_output,
      FLAGS_output_folder,
--- a/modules/observers/net_observer_reporter.h
+++ b/modules/observers/net_observer_reporter.h
@ -19,7 +19,6 @@ struct PerformanceInformation {
  std::string type = ""; // the type of the operator
  // Measured
  double latency = 0;
-  double cpuMilliseconds = 0;
 };

 class CAFFE2_OBSERVER_API NetObserverReporter {
--- a/modules/observers/net_observer_reporter_print.cc
+++ b/modules/observers/net_observer_reporter_print.cc
@ -1,6 +1,5 @@
 #include "observers/net_observer_reporter_print.h"

-#include <algorithm>
 #include <sstream>
 #include "caffe2/core/init.h"
 #include "observers/observer_config.h"
@ -10,7 +9,6 @@ namespace caffe2 {
 const std::string NetObserverReporterPrint::IDENTIFIER = "Caffe2Observer ";
 static std::string get_op_args(PerformanceInformation p);
 static std::string get_tensor_shapes(PerformanceInformation p);
-static std::string sanatize(std::string json_s);

 void NetObserverReporterPrint::report(
    NetBase* net,
@ -25,49 +23,29 @@ void NetObserverReporterPrint::report(
                             {"value", c10::to_string(p.second.latency * 1000)},
                             {"unit", "us"},
                             {"metric", "latency"}});
-      caffe2_perf.push_back({{"type", "NET_"},
-                             {
-                               "value",
-                               c10::to_string(
-                                   p.second.cpuMilliseconds /
-                                   p.second.latency *
-                                   100),
-                             },
-                             {"unit", "percent"},
-                             {"metric", "cpu_percent"}});
    } else if (p.first != "NET_DELAY") {
      // for operator perf
      std::string shape_str = get_tensor_shapes(p.second);
      std::string args_str = get_op_args(p.second);
-      std::string type = p.first;
-      caffe2_perf.push_back({{"type", type},
+
+      caffe2_perf.push_back({{"type", p.first},
                             {"value", c10::to_string(p.second.latency * 1000)},
                             {"unit", "us"},
                             {"metric", "latency"}});
-      caffe2_perf.push_back({{"type", type},
-                             {
-                               "value",
-                               c10::to_string(
-                                   p.second.cpuMilliseconds /
-                                   p.second.latency *
-                                   100),
-                             },
-                             {"unit", "percent"},
-                             {"metric", "cpu_percent"}});
      if (p.second.flops > 0) {
-        caffe2_perf.push_back({{"type", type},
+        caffe2_perf.push_back({{"type", p.first},
                               {"value", c10::to_string(p.second.flops)},
                               {"unit", "flop"},
                               {"metric", "flops"}});
      }
      if (shape_str != "") {
-        caffe2_perf.push_back({{"type", type},
+        caffe2_perf.push_back({{"type", p.first},
                               {"info_string", shape_str},
                               {"unit", ""},
                               {"metric", "tensor_shapes"}});
      }
      if (args_str != "") {
-        caffe2_perf.push_back({{"type", type},
+        caffe2_perf.push_back({{"type", p.first},
                               {"info_string", args_str},
                               {"unit", ""},
                               {"metric", "op_args"}});
@ -79,13 +57,13 @@ void NetObserverReporterPrint::report(
    std::stringstream buffer;
    auto entry = *it;
    buffer << IDENTIFIER << "{";
-    buffer << "\"type\": \"" << sanatize(entry["type"]) << "\","
-           << "\"unit\": \"" << sanatize(entry["unit"]) << "\","
-           << "\"metric\": \"" << sanatize(entry["metric"]) << "\",";
+    buffer << "\"type\": \"" << entry["type"] << "\","
+           << "\"unit\": \"" << entry["unit"] << "\","
+           << "\"metric\": \"" << entry["metric"] << "\",";
    if (entry.find("value") != entry.end()) {
-      buffer << "\"value\": \"" << sanatize(entry["value"]) << "\"";
+      buffer << "\"value\": \"" << entry["value"] << "\"";
    } else if (entry.find("info_string") != entry.end()) {
-      buffer << "\"info_string\": \"" << sanatize(entry["info_string"]) << "\"";
+      buffer << "\"info_string\": \"" << entry["info_string"] << "\"";
    }
    buffer << "}";
    LOG(INFO) << buffer.str();
@ -139,12 +117,4 @@ static std::string get_op_args(PerformanceInformation p) {
  }
  return args_str;
 }
-
-static std::string sanatize(std::string json_s) {
-  // Remove illegal characters from the name that would cause json string to
-  // become invalid
-  json_s.erase(std::remove(json_s.begin(), json_s.end(), '"'), json_s.end());
-  json_s.erase(std::remove(json_s.begin(), json_s.end(), '\\'), json_s.end());
-  return json_s;
-}
 }
--- a/modules/observers/perf_observer.cc
+++ b/modules/observers/perf_observer.cc
@ -6,7 +6,6 @@
 #endif

 #include <random>
-#include <time.h>
 #include "caffe2/core/common.h"
 #include "caffe2/core/init.h"
 #include "caffe2/core/operator.h"
@ -63,17 +62,6 @@ bool registerGlobalPerfNetObserverCreator(int* /*pargc*/, char*** /*pargv*/) {
 }
 } // namespace

-double getClockTimeMilliseconds(clockid_t clk_id) {
-  int result;
-  struct timespec tp;
-  result = clock_gettime(clk_id, &tp);
-  if (result == -1) {
-    return 0.0;
-  } else {
-    return tp.tv_sec * 1000.0 + tp.tv_nsec / 1000000.0;
-  }
-}
-
 REGISTER_CAFFE2_EARLY_INIT_FUNCTION(
    registerGlobalPerfNetObserverCreator,
    &registerGlobalPerfNetObserverCreator,
@ -119,31 +107,30 @@ void PerfNetObserver::Start() {
    }
  }

-  wallMilliseconds_ = getClockTimeMilliseconds(CLOCK_MONOTONIC);
-  cpuMilliseconds_ = getClockTimeMilliseconds(CLOCK_PROCESS_CPUTIME_ID);
+  if (logType_ != PerfNetObserver::NONE) {
+    /* Only start timer when we need to */
+    timer_.Start();
+  }
 }

 void PerfNetObserver::Stop() {
  if (logType_ == PerfNetObserver::NONE) {
    return;
  }
+  auto currentRunTime = timer_.MilliSeconds();
  std::map<std::string, PerformanceInformation> info;
  PerformanceInformation net_perf;
-  net_perf.cpuMilliseconds =
-      getClockTimeMilliseconds(CLOCK_PROCESS_CPUTIME_ID) - cpuMilliseconds_;
-  net_perf.latency =
-      getClockTimeMilliseconds(CLOCK_MONOTONIC) - wallMilliseconds_;
-
+  net_perf.latency = currentRunTime;
  if (logType_ == PerfNetObserver::OPERATOR_DELAY) {
    const auto& operators = subject_->GetOperators();
    for (int idx = 0; idx < operators.size(); ++idx) {
      const auto* op = operators[idx];
      auto name = getObserverName(op, idx);
      PerformanceInformation p;
-      const PerfOperatorObserver* opObserver =
-          static_cast<const PerfOperatorObserver*>(observerMap_[op]);
-      p.latency = opObserver->getWallMilliseconds();
-      p.cpuMilliseconds = opObserver->getCpuMilliseconds();
+
+      p.latency = static_cast<const PerfOperatorObserver*>(observerMap_[op])
+                      ->getMilliseconds();
+
      p.engine = op->engine();
      p.type = op->type();
      p.tensor_shapes =
@ -189,34 +176,30 @@ PerfOperatorObserver::PerfOperatorObserver(
    PerfNetObserver* netObserver)
    : ObserverBase<OperatorBase>(op),
      netObserver_(netObserver),
-      wallMilliseconds_(0),
-      cpuMilliseconds_(0) {
+      milliseconds_(0) {
  CAFFE_ENFORCE(netObserver_, "Observers can't operate outside of the net");
 }

 PerfOperatorObserver::~PerfOperatorObserver() {}

 void PerfOperatorObserver::Start() {
-  wallMilliseconds_ = getClockTimeMilliseconds(CLOCK_MONOTONIC);
-  cpuMilliseconds_ = getClockTimeMilliseconds(CLOCK_PROCESS_CPUTIME_ID);
+  /* Get the time from the start of the net minus the time spent
+     in previous invocations. It is the time spent on other operators.
+     This way, when the operator finishes, the time from the start of the net
+     minus the time spent in all other operators  is the total time on this
+     operator. This is done to avoid saving a timer in each operator */
+  milliseconds_ = netObserver_->getTimer().MilliSeconds() - milliseconds_;
 }

 void PerfOperatorObserver::Stop() {
  /* Time from the start of the net minus the time spent on all other
     operators is the time spent on this operator */
-  cpuMilliseconds_ =
-      getClockTimeMilliseconds(CLOCK_PROCESS_CPUTIME_ID) - cpuMilliseconds_;
-  wallMilliseconds_ =
-      getClockTimeMilliseconds(CLOCK_MONOTONIC) - wallMilliseconds_;
+  milliseconds_ = netObserver_->getTimer().MilliSeconds() - milliseconds_;
  tensor_shapes_ = subject_->InputTensorShapes();
 }

-double PerfOperatorObserver::getWallMilliseconds() const {
-  return wallMilliseconds_;
-}
-
-double PerfOperatorObserver::getCpuMilliseconds() const {
-  return cpuMilliseconds_;
+double PerfOperatorObserver::getMilliseconds() const {
+  return milliseconds_;
 }

 std::vector<TensorShape> PerfOperatorObserver::getTensorShapes() const {
--- a/modules/observers/perf_observer.h
+++ b/modules/observers/perf_observer.h
@ -10,13 +10,16 @@

 namespace caffe2 {

-double getClockTimeMilliseconds();

 class CAFFE2_OBSERVER_API PerfNetObserver : public NetObserver {
 public:
  explicit PerfNetObserver(NetBase* subject_);
  virtual ~PerfNetObserver();

+  caffe2::Timer& getTimer() {
+    return timer_;
+  }
+
 private:
  void Start() override;
  void Stop() override;
@ -34,8 +37,7 @@ class CAFFE2_OBSERVER_API PerfNetObserver : public NetObserver {
  std::unordered_map<const OperatorBase*, const ObserverBase<OperatorBase>*>
      observerMap_;

-  double wallMilliseconds_;
-  double cpuMilliseconds_;
+  caffe2::Timer timer_;
 };

 class PerfOperatorObserver : public ObserverBase<OperatorBase> {
@ -43,8 +45,7 @@ class PerfOperatorObserver : public ObserverBase<OperatorBase> {
  PerfOperatorObserver(OperatorBase* op, PerfNetObserver* netObserver);
  virtual ~PerfOperatorObserver();

-  double getWallMilliseconds() const;
-  double getCpuMilliseconds() const;
+  double getMilliseconds() const;
  std::vector<TensorShape> getTensorShapes() const;

 private:
@ -59,8 +60,7 @@ class PerfOperatorObserver : public ObserverBase<OperatorBase> {
  // without storing inside the operator observer. Each field is memory
  // costly here and a raw pointer is a cheapest sholution
  PerfNetObserver* netObserver_;
-  double wallMilliseconds_;
-  double cpuMilliseconds_;
+  double milliseconds_;
  std::vector<TensorShape> tensor_shapes_;
 };
 } // namespace caffe2