mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/13031 Codemod generated with clangr shard mode, 25 files per diff, for renaming dims() to sizes() Reviewed By: ezyang Differential Revision: D10476232 fbshipit-source-id: cb4ad76be068065eb2c5e7d87f33d04423cf93c4
206 lines
7.2 KiB
C++
206 lines
7.2 KiB
C++
#include "roi_pool_op.h"
|
|
|
|
#include <cfloat>
|
|
|
|
namespace caffe2 {
|
|
|
|
using std::max;
|
|
using std::min;
|
|
|
|
template <>
|
|
bool RoIPoolOp<float, CPUContext>::RunOnDevice() {
|
|
const auto& X = Input(0); // Input data to pool
|
|
const auto& R = Input(1); // RoIs
|
|
auto* Y = Output(0); // RoI pooled data
|
|
auto* A = is_test_ ? nullptr : Output(1); // argmaxes
|
|
|
|
// Each ROI is of the form [batch_index x1 y1 x2 y2]
|
|
CAFFE_ENFORCE_EQ(R.dim32(1), 5);
|
|
|
|
// TODO: Handle the storage_order properly to get the NCWH.
|
|
int batch_size = X.dim32(0);
|
|
int channels = X.dim32(1);
|
|
int height = X.dim32(2);
|
|
int width = X.dim32(3);
|
|
int num_rois = R.dim32(0);
|
|
|
|
Y->Resize(num_rois, channels, pooled_height_, pooled_width_);
|
|
if (!is_test_) {
|
|
A->Resize(Y->sizes());
|
|
}
|
|
|
|
const float* Xdata = X.data<float>();
|
|
const float* rois = R.data<float>();
|
|
float* Ydata = Y->template mutable_data<float>();
|
|
int* argmax_data = is_test_ ? nullptr : A->template mutable_data<int>();
|
|
|
|
// For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
|
|
for (int n = 0; n < num_rois; ++n) {
|
|
int roi_batch_id = rois[0];
|
|
int roi_start_w = round(rois[1] * spatial_scale_);
|
|
int roi_start_h = round(rois[2] * spatial_scale_);
|
|
int roi_end_w = round(rois[3] * spatial_scale_);
|
|
int roi_end_h = round(rois[4] * spatial_scale_);
|
|
CAFFE_ENFORCE_GE(roi_batch_id, 0);
|
|
CAFFE_ENFORCE_LT(roi_batch_id, batch_size);
|
|
|
|
// Force malformed ROIs to be 1x1
|
|
int roi_height = max(roi_end_h - roi_start_h + 1, 1);
|
|
int roi_width = max(roi_end_w - roi_start_w + 1, 1);
|
|
|
|
const float bin_size_h =
|
|
static_cast<float>(roi_height) / static_cast<float>(pooled_height_);
|
|
const float bin_size_w =
|
|
static_cast<float>(roi_width) / static_cast<float>(pooled_width_);
|
|
|
|
const float* batch_data = Xdata + roi_batch_id * X.size_from_dim(1);
|
|
|
|
for (int c = 0; c < channels; ++c) {
|
|
for (int ph = 0; ph < pooled_height_; ++ph) {
|
|
for (int pw = 0; pw < pooled_width_; ++pw) {
|
|
// Compute pooling region for this output unit:
|
|
// start (included) = floor(ph * roi_height / pooled_height_)
|
|
// end (excluded) = ceil((ph + 1) * roi_height / pooled_height_)
|
|
int hstart =
|
|
static_cast<int>(floor(static_cast<float>(ph) * bin_size_h));
|
|
int wstart =
|
|
static_cast<int>(floor(static_cast<float>(pw) * bin_size_w));
|
|
int hend =
|
|
static_cast<int>(ceil(static_cast<float>(ph + 1) * bin_size_h));
|
|
int wend =
|
|
static_cast<int>(ceil(static_cast<float>(pw + 1) * bin_size_w));
|
|
|
|
// Add roi offsets and clip to input boundaries
|
|
hstart = min(max(hstart + roi_start_h, 0), height);
|
|
hend = min(max(hend + roi_start_h, 0), height);
|
|
wstart = min(max(wstart + roi_start_w, 0), width);
|
|
wend = min(max(wend + roi_start_w, 0), width);
|
|
|
|
const int pool_index = ph * pooled_width_ + pw;
|
|
|
|
// Define an empty pooling region to be zero
|
|
bool is_empty = (hend <= hstart) || (wend <= wstart);
|
|
Ydata[pool_index] = is_empty ? 0 : -FLT_MAX;
|
|
if (!is_test_) {
|
|
// If nothing is pooled, argmax = -1 causes nothing to be backprop'd
|
|
argmax_data[pool_index] = -1;
|
|
}
|
|
|
|
for (int h = hstart; h < hend; ++h) {
|
|
for (int w = wstart; w < wend; ++w) {
|
|
const int index = h * width + w;
|
|
if (batch_data[index] > Ydata[pool_index]) {
|
|
Ydata[pool_index] = batch_data[index];
|
|
if (!is_test_) {
|
|
argmax_data[pool_index] = index;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Increment all data pointers by one channel
|
|
batch_data += X.size_from_dim(2);
|
|
Ydata += Y->size_from_dim(2);
|
|
if (!is_test_) {
|
|
argmax_data += A->size_from_dim(2);
|
|
}
|
|
}
|
|
// Increment ROI data pointer
|
|
rois += R.size_from_dim(1);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
REGISTER_CPU_OPERATOR(RoIPool, RoIPoolOp<float, CPUContext>);
|
|
REGISTER_CPU_OPERATOR(RoIPoolGradient, RoIPoolGradientOp<float, CPUContext>);
|
|
|
|
// Input: X, rois
|
|
// Output case #1: Y, argmaxes (train mode)
|
|
// Output case #2: Y (test mode)
|
|
OPERATOR_SCHEMA(RoIPool)
|
|
.NumInputs(2)
|
|
.NumOutputs({1, 2})
|
|
.TensorInferenceFunction([](const OperatorDef& def,
|
|
const vector<TensorShape>& in) {
|
|
ArgumentHelper helper(def);
|
|
const StorageOrder order = StringToStorageOrder(
|
|
helper.GetSingleArgument<string>("order", "NCHW"));
|
|
const TensorShape& X = in[0];
|
|
const int num_channels =
|
|
(order == StorageOrder::NCHW ? X.dims(1) : X.dims(3));
|
|
const TensorShape& R = in[1];
|
|
const int num_rois = R.dims(0);
|
|
const int pooled_height = helper.GetSingleArgument<int>("pooled_h", 1);
|
|
const int pooled_width = helper.GetSingleArgument<int>("pooled_w", 1);
|
|
TensorShape Y = CreateTensorShape(
|
|
vector<int>({num_rois, num_channels, pooled_height, pooled_width}),
|
|
X.data_type());
|
|
|
|
bool is_test = helper.GetSingleArgument<int>(OpSchema::Arg_IsTest, 0);
|
|
if (!is_test) {
|
|
TensorShape argmaxes = Y;
|
|
argmaxes.set_data_type(TensorProto_DataType_INT32);
|
|
return vector<TensorShape>({Y, argmaxes});
|
|
} else {
|
|
return vector<TensorShape>({Y});
|
|
}
|
|
})
|
|
.SetDoc(R"DOC(
|
|
Carries out ROI Pooling for Faster-RCNN.
|
|
Depending on the mode, there are multiple output cases:
|
|
|
|
Output case #1: Y, argmaxes (train mode)
|
|
Output case #2: Y (test mode)
|
|
)DOC")
|
|
.Arg(
|
|
"is_test",
|
|
"If set, run in test mode and skip computation of argmaxes (used for "
|
|
"gradient computation). Only one output tensor is produced. "
|
|
"(Default: false).")
|
|
.Arg("order", "A StorageOrder string (Default: \"NCHW\").")
|
|
.Arg("pooled_h", "The pooled output height (Default: 1).")
|
|
.Arg("pooled_w", "The pooled output width (Default: 1).")
|
|
.Arg(
|
|
"spatial_scale",
|
|
"Multiplicative spatial scale factor to translate ROI coords from "
|
|
"their input scale to the scale used when pooling (Default: 1.0).")
|
|
.Input(
|
|
0,
|
|
"X",
|
|
"The input 4-D tensor of data. Only NCHW order is currently supported.")
|
|
.Input(
|
|
1,
|
|
"rois",
|
|
"RoIs (Regions of Interest) to pool over. Should be a 2-D tensor of "
|
|
"shape (num_rois, 5) given as [[batch_id, x1, y1, x2, y2], ...].")
|
|
.Output(
|
|
0,
|
|
"Y",
|
|
"RoI pooled output 4-D tensor of shape "
|
|
"(num_rois, channels, pooled_h, pooled_w).")
|
|
.Output(
|
|
1,
|
|
"argmaxes",
|
|
"Argmaxes corresponding to indices in X used for gradient computation. "
|
|
"Only output if arg \"is_test\" is false.");
|
|
|
|
// Input: X, rois, argmaxes, dY (aka "gradOutput")
|
|
// Output: dX (aka "gradInput")
|
|
OPERATOR_SCHEMA(RoIPoolGradient).NumInputs(4).NumOutputs(1);
|
|
|
|
class GetRoIPoolGradient : public GradientMakerBase {
|
|
using GradientMakerBase::GradientMakerBase;
|
|
vector<OperatorDef> GetGradientDefs() override {
|
|
return SingleGradientDef(
|
|
"RoIPoolGradient",
|
|
"",
|
|
vector<string>{I(0), I(1), O(1), GO(0)},
|
|
vector<string>{GI(0)});
|
|
}
|
|
};
|
|
|
|
REGISTER_GRADIENT(RoIPool, GetRoIPoolGradient);
|
|
|
|
} // namespace caffe2
|