mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Summary: As GoogleTest `TEST` macro is non-compliant with it as well as `DEFINE_DISPATCH` All changes but the ones to `.clang-tidy` are generated using following script: ``` for i in `find . -type f -iname "*.c*" -or -iname "*.h"|xargs grep cppcoreguidelines-avoid-non-const-global-variables|cut -f1 -d:|sort|uniq`; do sed -i "/\/\/ NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)/d" $i; done ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/62008 Reviewed By: driazati, r-barnes Differential Revision: D29838584 Pulled By: malfet fbshipit-source-id: 1b2f8602c945bd4ce50a9bfdd204755556e31d13
349 lines
9.9 KiB
C++
349 lines
9.9 KiB
C++
#include <chrono>
|
|
#include <vector>
|
|
#include "caffe2/core/operator.h"
|
|
#include "caffe2/core/stats.h"
|
|
#include "caffe2/core/tensor.h"
|
|
|
|
namespace caffe2 {
|
|
|
|
class StatRegistryCreateOp : public Operator<CPUContext> {
|
|
public:
|
|
template <class... Args>
|
|
explicit StatRegistryCreateOp(Args&&... args)
|
|
: Operator(std::forward<Args>(args)...) {}
|
|
|
|
bool RunOnDevice() override {
|
|
*OperatorBase::Output<std::unique_ptr<StatRegistry>>(0) =
|
|
// NOLINTNEXTLINE(modernize-make-unique)
|
|
std::unique_ptr<StatRegistry>(new StatRegistry);
|
|
return true;
|
|
}
|
|
};
|
|
|
|
class StatRegistryExportOp : public Operator<CPUContext> {
|
|
public:
|
|
template <class... Args>
|
|
explicit StatRegistryExportOp(Args&&... args)
|
|
: Operator(std::forward<Args>(args)...),
|
|
reset_(GetSingleArgument<bool>("reset", true)) {}
|
|
|
|
bool RunOnDevice() override {
|
|
auto registry = InputSize() > 0
|
|
? OperatorBase::Input<std::unique_ptr<StatRegistry>>(0).get()
|
|
: &StatRegistry::get();
|
|
auto* keys = Output(0);
|
|
auto* values = Output(1);
|
|
auto* timestamps = Output(2);
|
|
auto data = registry->publish(reset_);
|
|
keys->Resize(data.size());
|
|
values->Resize(data.size());
|
|
timestamps->Resize(data.size());
|
|
auto* pkeys = keys->template mutable_data<std::string>();
|
|
auto* pvals = values->template mutable_data<int64_t>();
|
|
auto* ptimestamps = timestamps->template mutable_data<int64_t>();
|
|
int i = 0;
|
|
for (const auto& stat : data) {
|
|
// NOLINTNEXTLINE(performance-move-const-arg)
|
|
pkeys[i] = std::move(stat.key);
|
|
pvals[i] = stat.value;
|
|
ptimestamps[i] =
|
|
std::chrono::nanoseconds(stat.ts.time_since_epoch()).count();
|
|
++i;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
private:
|
|
bool reset_;
|
|
};
|
|
|
|
class StatRegistryUpdateOp : public Operator<CPUContext> {
|
|
public:
|
|
template <class... Args>
|
|
explicit StatRegistryUpdateOp(Args&&... args)
|
|
: Operator(std::forward<Args>(args)...) {}
|
|
|
|
bool RunOnDevice() override {
|
|
const auto& keys = Input(0);
|
|
const auto& values = Input(1);
|
|
auto registry = InputSize() == 3
|
|
? OperatorBase::Input<std::unique_ptr<StatRegistry>>(2).get()
|
|
: &StatRegistry::get();
|
|
CAFFE_ENFORCE_EQ(keys.numel(), values.numel());
|
|
ExportedStatList data(keys.numel());
|
|
auto* pkeys = keys.data<std::string>();
|
|
auto* pvals = values.data<int64_t>();
|
|
int i = 0;
|
|
for (auto& stat : data) {
|
|
stat.key = pkeys[i];
|
|
stat.value = pvals[i];
|
|
++i;
|
|
}
|
|
registry->update(data);
|
|
return true;
|
|
}
|
|
};
|
|
|
|
class TimerInstance {
|
|
public:
|
|
explicit TimerInstance(const std::string& name)
|
|
: running_(false), stat_(name) {}
|
|
|
|
void begin() {
|
|
CAFFE_ENFORCE(!running_, "Called TimerBegin on an already running timer.");
|
|
running_ = true;
|
|
start_ = std::chrono::high_resolution_clock::now();
|
|
}
|
|
|
|
void end() {
|
|
CAFFE_ENFORCE(running_, "Called TimerEnd on a stopped timer.");
|
|
using namespace std::chrono;
|
|
auto duration = high_resolution_clock::now() - start_;
|
|
auto nanos = duration_cast<nanoseconds>(duration).count();
|
|
// NOLINTNEXTLINE(clang-diagnostic-unused-variable)
|
|
CAFFE_EVENT(stat_, time_ns, nanos);
|
|
running_ = false;
|
|
}
|
|
|
|
int64_t get_ns() {
|
|
CAFFE_ENFORCE(running_, "Called TimerGet on a stopped timer.");
|
|
using namespace std::chrono;
|
|
auto duration = high_resolution_clock::now() - start_;
|
|
auto nanos = duration_cast<nanoseconds>(duration).count();
|
|
return nanos;
|
|
}
|
|
|
|
private:
|
|
bool running_;
|
|
std::chrono::high_resolution_clock::time_point start_;
|
|
|
|
struct TimerStat {
|
|
// NOLINTNEXTLINE(modernize-pass-by-value)
|
|
CAFFE_STAT_CTOR(TimerStat);
|
|
CAFFE_AVG_EXPORTED_STAT(time_ns);
|
|
} stat_;
|
|
};
|
|
|
|
struct TimerBeginOp : public Operator<CPUContext> {
|
|
explicit TimerBeginOp(const OperatorDef& operator_def, Workspace* ws)
|
|
: Operator(operator_def, ws),
|
|
given_name_(GetSingleArgument<std::string>(
|
|
"counter_name",
|
|
operator_def.output().Get(0))),
|
|
timer_([this]() { return given_name_; }()) {}
|
|
|
|
bool RunOnDevice() override {
|
|
*OperatorBase::Output<TimerInstance*>(0) = &timer_;
|
|
timer_.begin();
|
|
return true;
|
|
}
|
|
|
|
private:
|
|
const std::string given_name_;
|
|
TimerInstance timer_;
|
|
};
|
|
|
|
struct TimerEndOp : public Operator<CPUContext> {
|
|
template <class... Args>
|
|
explicit TimerEndOp(Args&&... args) : Operator(std::forward<Args>(args)...) {}
|
|
|
|
bool RunOnDevice() override {
|
|
OperatorBase::Input<TimerInstance*>(0)->end();
|
|
return true;
|
|
}
|
|
};
|
|
|
|
struct TimerGetAndEndOp : public Operator<CPUContext> {
|
|
template <class... Args>
|
|
explicit TimerGetAndEndOp(Args&&... args)
|
|
: Operator(std::forward<Args>(args)...) {}
|
|
|
|
bool RunOnDevice() override {
|
|
int64_t nanos = OperatorBase::Input<TimerInstance*>(0)->get_ns();
|
|
OperatorBase::Input<TimerInstance*>(0)->end();
|
|
auto* res = Output(0);
|
|
res->Resize(1);
|
|
res->template mutable_data<int64_t>()[0] = nanos;
|
|
return true;
|
|
}
|
|
};
|
|
|
|
struct TimerGetOp : public Operator<CPUContext> {
|
|
template <class... Args>
|
|
explicit TimerGetOp(Args&&... args) : Operator(std::forward<Args>(args)...) {}
|
|
|
|
bool RunOnDevice() override {
|
|
int64_t nanos = OperatorBase::Input<TimerInstance*>(0)->get_ns();
|
|
auto* res = Output(0);
|
|
res->Resize();
|
|
res->template mutable_data<int64_t>()[0] = nanos;
|
|
return true;
|
|
}
|
|
};
|
|
|
|
REGISTER_CPU_OPERATOR(StatRegistryCreate, StatRegistryCreateOp);
|
|
REGISTER_CPU_OPERATOR(StatRegistryUpdate, StatRegistryUpdateOp);
|
|
REGISTER_CPU_OPERATOR(StatRegistryExport, StatRegistryExportOp);
|
|
|
|
REGISTER_CPU_OPERATOR(TimerBegin, TimerBeginOp);
|
|
REGISTER_CPU_OPERATOR(TimerEnd, TimerEndOp);
|
|
REGISTER_CPU_OPERATOR(TimerGetAndEnd, TimerGetAndEndOp);
|
|
REGISTER_CPU_OPERATOR(TimerGet, TimerGetOp);
|
|
|
|
OPERATOR_SCHEMA(StatRegistryCreate)
|
|
.NumInputs(0)
|
|
.NumOutputs(1)
|
|
.SetDoc(R"DOC(
|
|
Create a StatRegistry object that will contain a map of performance counters
|
|
keyed by name. A StatRegistry is used to gather and retrieve performance
|
|
counts throughout the caffe2 codebase.
|
|
)DOC")
|
|
.Output(0, "handle", "A Blob pointing to the newly created StatRegistry.");
|
|
|
|
OPERATOR_SCHEMA(StatRegistryUpdate)
|
|
.NumInputs(2, 3)
|
|
.NumOutputs(0)
|
|
.SetDoc(R"DOC(
|
|
Update the given StatRegistry, or the global StatRegistry,
|
|
with the values of counters for the given keys.
|
|
)DOC")
|
|
.Input(0, "keys", "1D string tensor with the key names to update.")
|
|
.Input(1, "values", "1D int64 tensor with the values to update.")
|
|
.Input(
|
|
2,
|
|
"handle",
|
|
"If provided, update the given StatRegistry. "
|
|
"Otherwise, update the global singleton.");
|
|
|
|
OPERATOR_SCHEMA(StatRegistryExport)
|
|
.NumInputs(0, 1)
|
|
.NumOutputs(3)
|
|
.Input(
|
|
0,
|
|
"handle",
|
|
"If provided, export values from given StatRegistry."
|
|
"Otherwise, export values from the global singleton StatRegistry.")
|
|
.Output(0, "keys", "1D string tensor with exported key names")
|
|
.Output(1, "values", "1D int64 tensor with exported values")
|
|
.Output(2, "timestamps", "The unix timestamp at counter retrieval.")
|
|
.Arg(
|
|
"reset",
|
|
"(default true) Whether to atomically reset the counters afterwards.");
|
|
|
|
OPERATOR_SCHEMA(TimerBegin)
|
|
.NumInputs(0)
|
|
.NumOutputs(1)
|
|
.SetDoc(R"DOC(
|
|
Start a wallclock timer, returning a scalar tensor containing a pointer to it. The timer is stopped by calling **TimerEnd**.
|
|
|
|
Github Links:
|
|
- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/stats_ops.cc
|
|
|
|
)DOC")
|
|
.Arg("counter_name", "(*str*): name of the timer object; if not set use output name")
|
|
.Output(0, "timer", "(*Tensor`<ptr>`*): pointer to a timer object");
|
|
|
|
OPERATOR_SCHEMA(TimerEnd)
|
|
.NumInputs(1)
|
|
.NumOutputs(0)
|
|
.SetDoc(R"DOC(
|
|
Stop a timer started with **TimerBegin**. Publishes a CAFFE_EVENT.
|
|
|
|
Github Links:
|
|
- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/stats_ops.cc
|
|
|
|
)DOC")
|
|
.Input(0, "timer", "(*Tensor`<ptr>`*): pointer to a timer object; obtained from **TimerBegin** op");
|
|
|
|
OPERATOR_SCHEMA(TimerGetAndEnd)
|
|
.NumInputs(1)
|
|
.NumOutputs(1)
|
|
.SetDoc(R"DOC(
|
|
Queries the current time of a timer in nanos, stops the timer publishing a CAFFE_EVENT.
|
|
|
|
Github Links:
|
|
- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/stats_ops.cc
|
|
|
|
<details>
|
|
|
|
<summary> <b>Example</b> </summary>
|
|
|
|
**Code**
|
|
|
|
```
|
|
|
|
workspace.ResetWorkspace()
|
|
|
|
timerbegin_op = core.CreateOperator(
|
|
"TimerBegin",
|
|
[],
|
|
["timer"]
|
|
)
|
|
|
|
timerget_op = core.CreateOperator(
|
|
"TimerGet",
|
|
["timer"],
|
|
["nanos"]
|
|
)
|
|
|
|
timerend_op = core.CreateOperator(
|
|
"TimerEnd",
|
|
["timer"],
|
|
[]
|
|
)
|
|
|
|
timergetandend_op = core.CreateOperator(
|
|
"TimerGetAndEnd",
|
|
["timer"],
|
|
["nanos"]
|
|
)
|
|
|
|
// Test TimerBegin/TimerGet/TimerEnd
|
|
workspace.RunOperatorOnce(timerbegin_op)
|
|
print("timer:", workspace.FetchBlob("timer"))
|
|
workspace.RunOperatorOnce(timerget_op)
|
|
print("nanos:", workspace.FetchBlob("nanos"))
|
|
workspace.RunOperatorOnce(timerend_op)
|
|
|
|
|
|
// Test TimerBegin/TimerGetAndEnd
|
|
workspace.RunOperatorOnce(timerbegin_op)
|
|
print("timer:", workspace.FetchBlob("timer"))
|
|
workspace.RunOperatorOnce(timergetandend_op)
|
|
print("nanos:", workspace.FetchBlob("nanos"))
|
|
|
|
```
|
|
|
|
**Result**
|
|
|
|
```
|
|
|
|
timer: b'timer, a C++ native class of type caffe2::TimerInstance*.'
|
|
nanos: 361140
|
|
timer: b'timer, a C++ native class of type caffe2::TimerInstance*.'
|
|
nanos: [252250]
|
|
|
|
```
|
|
|
|
</details>
|
|
|
|
)DOC")
|
|
.Input(0, "timer", "(*Tensor`<ptr>`*): pointer to a timer object; obtained from **TimerBegin** op")
|
|
.Output(0, "nanos", "(*Tensor`<int64>`*): scalar tensor containing time in nanoseconds");
|
|
|
|
OPERATOR_SCHEMA(TimerGet)
|
|
.NumInputs(1)
|
|
.NumOutputs(1)
|
|
.SetDoc(R"DOC(
|
|
Queries the current time of a timer object in nanoseconds.
|
|
|
|
Github Links:
|
|
- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/stats_ops.cc
|
|
|
|
)DOC")
|
|
.Input(0, "timer", "(*Tensor`<ptr>`*): pointer to a timer object; obtained from **TimerBegin** op")
|
|
.Output(0, "nanos", "(*Tensor`<int64>`*): scalar containing time in nanoseconds");
|
|
|
|
CAFFE_KNOWN_TYPE(TimerInstance*);
|
|
CAFFE_KNOWN_TYPE(std::unique_ptr<caffe2::StatRegistry>);
|
|
} // namespace caffe2
|