Revert D16469619: Add Virtual Memory and CPU percentage computation to AIBench

Differential Revision:
D16469619

Original commit changeset: 670f3549c830

fbshipit-source-id: f55d4cda36f5e29df2df306d33a70158e5a7908b
This commit is contained in:
Michael Suo 2019-08-04 15:59:58 -07:00 committed by Facebook Github Bot
parent cbf05305c0
commit 1b1bddaab3
8 changed files with 54 additions and 152 deletions

View file

@ -50,11 +50,6 @@ C10_DEFINE_string(
"Input type when specifying the input dimension."
"The supported types are float, uint8_t.");
C10_DEFINE_int(iter, 10, "The number of iterations to run.");
C10_DEFINE_bool(
measure_memory,
false,
"Whether to measure increase in allocated memory while "
"loading and running the net.");
C10_DEFINE_string(net, "", "The given net to benchmark.");
C10_DEFINE_string(
output,

View file

@ -35,14 +35,6 @@
#include <observers/observer_config.h>
#include <observers/perf_observer.h>
#if defined(TARGET_OS_MAC) || \
defined(TARGET_OS_IPHONE) || \
defined(TARGET_IPHONE_SIMULATOR)
#include <malloc/malloc.h>
#else
#include <malloc.h>
#endif
using std::map;
using std::shared_ptr;
using std::string;
@ -243,7 +235,7 @@ void fillInputBlob(
void runNetwork(
shared_ptr<caffe2::Workspace> workspace,
caffe2::NetBase* net,
caffe2::NetDef& net_def,
map<string, caffe2::TensorProtos>& tensor_protos_map,
const bool wipe_cache,
const bool run_individual,
@ -258,6 +250,13 @@ void runNetwork(
const std::string& output,
const std::string& output_folder) {
if (!net_def.has_name()) {
net_def.set_name("benchmark");
}
caffe2::NetBase* net = workspace->CreateNet(net_def);
CHECK_NOTNULL(net);
LOG(INFO) << "Starting benchmark.";
caffe2::ObserverConfig::initSampleRate(1, 1, 1, run_individual, warmup);
LOG(INFO) << "Running warmup runs.";
@ -377,35 +376,6 @@ void writeOutput(
}
}
void logBenchmarkResult(
const std::string& type,
const std::string& metric,
const std::string& unit,
const int value) {
LOG(INFO) << caffe2::NetObserverReporterPrint::IDENTIFIER << "{"
<< "\"type\": \"" << type << "\", "
<< "\"metric\": \"" << metric << "\", "
<< "\"unit\": \"" << unit << "\", "
<< "\"value\": " << c10::to_string(value) << "}\n";
}
long getVirtualMemoryIfOptionEnabled(bool FLAGS_measure_memory) {
if (FLAGS_measure_memory) {
#if defined(TARGET_OS_IPHONE) || \
defined(TARGET_OS_MAC) || \
defined(TARGET_IPHONE_SIMULATOR)
malloc_statistics_t stats = {0};
malloc_zone_statistics(nullptr, &stats);
return stats.size_allocated;
#else
struct mallinfo info = mallinfo();
return info.uordblks;
#endif
}
return 0;
}
int benchmark(
int argc,
char* argv[],
@ -416,7 +386,6 @@ int benchmark(
const string& FLAGS_input_file,
const string& FLAGS_input_type,
int FLAGS_iter,
bool FLAGS_measure_memory,
const string& FLAGS_net,
const string& FLAGS_output,
const string& FLAGS_output_folder,
@ -454,15 +423,19 @@ int benchmark(
auto workspace = std::make_shared<caffe2::Workspace>(new caffe2::Workspace());
bool run_on_gpu = backendCudaSet(FLAGS_backend);
// Run initialization network, measure resources used.
long init_vmem = getVirtualMemoryIfOptionEnabled(FLAGS_measure_memory);
// Run initialization network.
caffe2::NetDef init_net_def;
CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_init_net, &init_net_def));
setOperatorEngine(&init_net_def, FLAGS_backend);
CAFFE_ENFORCE(workspace->RunNetOnce(init_net_def));
init_vmem = getVirtualMemoryIfOptionEnabled(FLAGS_measure_memory) - init_vmem;
// Run main network.
caffe2::NetDef net_def;
CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_net, &net_def));
setOperatorEngine(&net_def, FLAGS_backend);
map<string, caffe2::TensorProtos> tensor_protos_map;
int num_blobs = loadInput(
workspace,
run_on_gpu,
@ -472,19 +445,9 @@ int benchmark(
FLAGS_input_dims,
FLAGS_input_type);
// Run main network.
long predict_vmem = getVirtualMemoryIfOptionEnabled(FLAGS_measure_memory);
caffe2::NetDef net_def;
CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_net, &net_def));
setOperatorEngine(&net_def, FLAGS_backend);
if (!net_def.has_name()) {
net_def.set_name("benchmark");
}
caffe2::NetBase* net = workspace->CreateNet(net_def);
CHECK_NOTNULL(net);
runNetwork(
workspace,
net,
net_def,
tensor_protos_map,
FLAGS_wipe_cache,
FLAGS_run_individual,
@ -498,12 +461,6 @@ int benchmark(
FLAGS_sleep_between_net_and_operator,
FLAGS_output,
FLAGS_output_folder);
predict_vmem = getVirtualMemoryIfOptionEnabled(
FLAGS_measure_memory) - predict_vmem;
if (FLAGS_measure_memory) {
logBenchmarkResult(
"NET_", "memory", "kB", (init_vmem + predict_vmem) / 1024);
}
return 0;
}

View file

@ -125,7 +125,7 @@ void writeOutput(
const int num_blobs);
void runNetwork(
shared_ptr<caffe2::Workspace> workspace,
caffe2::NetBase* net,
caffe2::NetDef& net_def,
map<string, caffe2::TensorProtos>& tensor_protos_map,
const bool wipe_cache,
const bool run_individual,
@ -149,7 +149,6 @@ int benchmark(
const string& FLAGS_input_file,
const string& FLAGS_input_type,
int FLAGS_iter,
bool FLAGS_measure_memory,
const string& FLAGS_net,
const string& FLAGS_output,
const string& FLAGS_output_folder,

View file

@ -22,7 +22,6 @@ int main(int argc, char** argv) {
FLAGS_input_file,
FLAGS_input_type,
FLAGS_iter,
FLAGS_measure_memory,
FLAGS_net,
FLAGS_output,
FLAGS_output_folder,

View file

@ -19,7 +19,6 @@ struct PerformanceInformation {
std::string type = ""; // the type of the operator
// Measured
double latency = 0;
double cpuMilliseconds = 0;
};
class CAFFE2_OBSERVER_API NetObserverReporter {

View file

@ -1,6 +1,5 @@
#include "observers/net_observer_reporter_print.h"
#include <algorithm>
#include <sstream>
#include "caffe2/core/init.h"
#include "observers/observer_config.h"
@ -10,7 +9,6 @@ namespace caffe2 {
const std::string NetObserverReporterPrint::IDENTIFIER = "Caffe2Observer ";
static std::string get_op_args(PerformanceInformation p);
static std::string get_tensor_shapes(PerformanceInformation p);
static std::string sanatize(std::string json_s);
void NetObserverReporterPrint::report(
NetBase* net,
@ -25,49 +23,29 @@ void NetObserverReporterPrint::report(
{"value", c10::to_string(p.second.latency * 1000)},
{"unit", "us"},
{"metric", "latency"}});
caffe2_perf.push_back({{"type", "NET_"},
{
"value",
c10::to_string(
p.second.cpuMilliseconds /
p.second.latency *
100),
},
{"unit", "percent"},
{"metric", "cpu_percent"}});
} else if (p.first != "NET_DELAY") {
// for operator perf
std::string shape_str = get_tensor_shapes(p.second);
std::string args_str = get_op_args(p.second);
std::string type = p.first;
caffe2_perf.push_back({{"type", type},
caffe2_perf.push_back({{"type", p.first},
{"value", c10::to_string(p.second.latency * 1000)},
{"unit", "us"},
{"metric", "latency"}});
caffe2_perf.push_back({{"type", type},
{
"value",
c10::to_string(
p.second.cpuMilliseconds /
p.second.latency *
100),
},
{"unit", "percent"},
{"metric", "cpu_percent"}});
if (p.second.flops > 0) {
caffe2_perf.push_back({{"type", type},
caffe2_perf.push_back({{"type", p.first},
{"value", c10::to_string(p.second.flops)},
{"unit", "flop"},
{"metric", "flops"}});
}
if (shape_str != "") {
caffe2_perf.push_back({{"type", type},
caffe2_perf.push_back({{"type", p.first},
{"info_string", shape_str},
{"unit", ""},
{"metric", "tensor_shapes"}});
}
if (args_str != "") {
caffe2_perf.push_back({{"type", type},
caffe2_perf.push_back({{"type", p.first},
{"info_string", args_str},
{"unit", ""},
{"metric", "op_args"}});
@ -79,13 +57,13 @@ void NetObserverReporterPrint::report(
std::stringstream buffer;
auto entry = *it;
buffer << IDENTIFIER << "{";
buffer << "\"type\": \"" << sanatize(entry["type"]) << "\","
<< "\"unit\": \"" << sanatize(entry["unit"]) << "\","
<< "\"metric\": \"" << sanatize(entry["metric"]) << "\",";
buffer << "\"type\": \"" << entry["type"] << "\","
<< "\"unit\": \"" << entry["unit"] << "\","
<< "\"metric\": \"" << entry["metric"] << "\",";
if (entry.find("value") != entry.end()) {
buffer << "\"value\": \"" << sanatize(entry["value"]) << "\"";
buffer << "\"value\": \"" << entry["value"] << "\"";
} else if (entry.find("info_string") != entry.end()) {
buffer << "\"info_string\": \"" << sanatize(entry["info_string"]) << "\"";
buffer << "\"info_string\": \"" << entry["info_string"] << "\"";
}
buffer << "}";
LOG(INFO) << buffer.str();
@ -139,12 +117,4 @@ static std::string get_op_args(PerformanceInformation p) {
}
return args_str;
}
static std::string sanatize(std::string json_s) {
// Remove illegal characters from the name that would cause json string to
// become invalid
json_s.erase(std::remove(json_s.begin(), json_s.end(), '"'), json_s.end());
json_s.erase(std::remove(json_s.begin(), json_s.end(), '\\'), json_s.end());
return json_s;
}
}

View file

@ -6,7 +6,6 @@
#endif
#include <random>
#include <time.h>
#include "caffe2/core/common.h"
#include "caffe2/core/init.h"
#include "caffe2/core/operator.h"
@ -63,17 +62,6 @@ bool registerGlobalPerfNetObserverCreator(int* /*pargc*/, char*** /*pargv*/) {
}
} // namespace
double getClockTimeMilliseconds(clockid_t clk_id) {
int result;
struct timespec tp;
result = clock_gettime(clk_id, &tp);
if (result == -1) {
return 0.0;
} else {
return tp.tv_sec * 1000.0 + tp.tv_nsec / 1000000.0;
}
}
REGISTER_CAFFE2_EARLY_INIT_FUNCTION(
registerGlobalPerfNetObserverCreator,
&registerGlobalPerfNetObserverCreator,
@ -119,31 +107,30 @@ void PerfNetObserver::Start() {
}
}
wallMilliseconds_ = getClockTimeMilliseconds(CLOCK_MONOTONIC);
cpuMilliseconds_ = getClockTimeMilliseconds(CLOCK_PROCESS_CPUTIME_ID);
if (logType_ != PerfNetObserver::NONE) {
/* Only start timer when we need to */
timer_.Start();
}
}
void PerfNetObserver::Stop() {
if (logType_ == PerfNetObserver::NONE) {
return;
}
auto currentRunTime = timer_.MilliSeconds();
std::map<std::string, PerformanceInformation> info;
PerformanceInformation net_perf;
net_perf.cpuMilliseconds =
getClockTimeMilliseconds(CLOCK_PROCESS_CPUTIME_ID) - cpuMilliseconds_;
net_perf.latency =
getClockTimeMilliseconds(CLOCK_MONOTONIC) - wallMilliseconds_;
net_perf.latency = currentRunTime;
if (logType_ == PerfNetObserver::OPERATOR_DELAY) {
const auto& operators = subject_->GetOperators();
for (int idx = 0; idx < operators.size(); ++idx) {
const auto* op = operators[idx];
auto name = getObserverName(op, idx);
PerformanceInformation p;
const PerfOperatorObserver* opObserver =
static_cast<const PerfOperatorObserver*>(observerMap_[op]);
p.latency = opObserver->getWallMilliseconds();
p.cpuMilliseconds = opObserver->getCpuMilliseconds();
p.latency = static_cast<const PerfOperatorObserver*>(observerMap_[op])
->getMilliseconds();
p.engine = op->engine();
p.type = op->type();
p.tensor_shapes =
@ -189,34 +176,30 @@ PerfOperatorObserver::PerfOperatorObserver(
PerfNetObserver* netObserver)
: ObserverBase<OperatorBase>(op),
netObserver_(netObserver),
wallMilliseconds_(0),
cpuMilliseconds_(0) {
milliseconds_(0) {
CAFFE_ENFORCE(netObserver_, "Observers can't operate outside of the net");
}
PerfOperatorObserver::~PerfOperatorObserver() {}
void PerfOperatorObserver::Start() {
wallMilliseconds_ = getClockTimeMilliseconds(CLOCK_MONOTONIC);
cpuMilliseconds_ = getClockTimeMilliseconds(CLOCK_PROCESS_CPUTIME_ID);
/* Get the time from the start of the net minus the time spent
in previous invocations. It is the time spent on other operators.
This way, when the operator finishes, the time from the start of the net
minus the time spent in all other operators is the total time on this
operator. This is done to avoid saving a timer in each operator */
milliseconds_ = netObserver_->getTimer().MilliSeconds() - milliseconds_;
}
void PerfOperatorObserver::Stop() {
/* Time from the start of the net minus the time spent on all other
operators is the time spent on this operator */
cpuMilliseconds_ =
getClockTimeMilliseconds(CLOCK_PROCESS_CPUTIME_ID) - cpuMilliseconds_;
wallMilliseconds_ =
getClockTimeMilliseconds(CLOCK_MONOTONIC) - wallMilliseconds_;
milliseconds_ = netObserver_->getTimer().MilliSeconds() - milliseconds_;
tensor_shapes_ = subject_->InputTensorShapes();
}
double PerfOperatorObserver::getWallMilliseconds() const {
return wallMilliseconds_;
}
double PerfOperatorObserver::getCpuMilliseconds() const {
return cpuMilliseconds_;
double PerfOperatorObserver::getMilliseconds() const {
return milliseconds_;
}
std::vector<TensorShape> PerfOperatorObserver::getTensorShapes() const {

View file

@ -10,13 +10,16 @@
namespace caffe2 {
double getClockTimeMilliseconds();
class CAFFE2_OBSERVER_API PerfNetObserver : public NetObserver {
public:
explicit PerfNetObserver(NetBase* subject_);
virtual ~PerfNetObserver();
caffe2::Timer& getTimer() {
return timer_;
}
private:
void Start() override;
void Stop() override;
@ -34,8 +37,7 @@ class CAFFE2_OBSERVER_API PerfNetObserver : public NetObserver {
std::unordered_map<const OperatorBase*, const ObserverBase<OperatorBase>*>
observerMap_;
double wallMilliseconds_;
double cpuMilliseconds_;
caffe2::Timer timer_;
};
class PerfOperatorObserver : public ObserverBase<OperatorBase> {
@ -43,8 +45,7 @@ class PerfOperatorObserver : public ObserverBase<OperatorBase> {
PerfOperatorObserver(OperatorBase* op, PerfNetObserver* netObserver);
virtual ~PerfOperatorObserver();
double getWallMilliseconds() const;
double getCpuMilliseconds() const;
double getMilliseconds() const;
std::vector<TensorShape> getTensorShapes() const;
private:
@ -59,8 +60,7 @@ class PerfOperatorObserver : public ObserverBase<OperatorBase> {
// without storing inside the operator observer. Each field is memory
// costly here and a raw pointer is a cheapest sholution
PerfNetObserver* netObserver_;
double wallMilliseconds_;
double cpuMilliseconds_;
double milliseconds_;
std::vector<TensorShape> tensor_shapes_;
};
} // namespace caffe2