have Im2ColNd support all types and allow customized padding value. (#273)

* have Im2ColNd support all types and allow customized padding value.

* only specialize the template in order NCHW.

* fix build break.

* fix build break
This commit is contained in:
Ke Zhang 2019-01-03 19:24:06 -08:00 committed by GitHub
parent 058803086d
commit 75934af896
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 114 additions and 108 deletions

View file

@ -20,15 +20,15 @@ Status Conv<float>::Compute(OpKernelContext* context) const {
if (kernel_shape.size() + 2 != W->Shape().NumDimensions()) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "kernel_shape num_dims is not compatible with W num_dims.",
" kernel_shape: ", TensorShape(kernel_shape).ToString().c_str(),
" W: ", W->Shape().ToString().c_str());
" kernel_shape: ", TensorShape(kernel_shape).ToString().c_str(),
" W: ", W->Shape().ToString().c_str());
}
for (size_t i = 0; i < kernel_shape.size(); ++i) {
if (kernel_shape[i] != W->Shape()[i + 2]) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "kernel_shape is not compatible with W shape.",
" kernel_shape: ", TensorShape(kernel_shape).ToString().c_str(),
" W: ", W->Shape().ToString().c_str());
" kernel_shape: ", TensorShape(kernel_shape).ToString().c_str(),
" W: ", W->Shape().ToString().c_str());
}
}
@ -111,7 +111,7 @@ Status Conv<float>::Compute(OpKernelContext* context) const {
for (int image_id = 0; image_id < N; ++image_id) {
for (int group_id = 0; group_id < group_; ++group_id) {
math::Im2colNd<float, CPUMathUtil, StorageOrder::NCHW>(
math::Im2colNd<float, CPUMathUtil, StorageOrder::NCHW>()(
Xdata + group_id * X_offset,
image_shape.GetDims().data(),
col_buffer_shape.data(),

View file

@ -57,15 +57,15 @@ Status Conv<T>::Compute(OpKernelContext* context) const {
if (kernel_shape.size() + 2 != W->Shape().NumDimensions()) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "kernel_shape num_dims is not compatible with W num_dims.",
" kernel_shape: ", TensorShape(kernel_shape).ToString().c_str(),
" W: ", W->Shape().ToString().c_str());
" kernel_shape: ", TensorShape(kernel_shape).ToString().c_str(),
" W: ", W->Shape().ToString().c_str());
}
for (size_t i = 0; i < kernel_shape.size(); ++i) {
if (kernel_shape[i] != W->Shape()[i + 2]) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "kernel_shape is not compatible with W shape.",
" kernel_shape: ", TensorShape(kernel_shape).ToString().c_str(),
" W: ", W->Shape().ToString().c_str());
" kernel_shape: ", TensorShape(kernel_shape).ToString().c_str(),
" W: ", W->Shape().ToString().c_str());
}
}
@ -135,7 +135,7 @@ Status Conv<T>::Compute(OpKernelContext* context) const {
col_buffer_data,
&CPUMathUtil::Instance());
} else {
math::Im2colNd<T, CPUMathUtil, StorageOrder::NCHW>(
math::Im2colNd<T, CPUMathUtil, StorageOrder::NCHW>()(
Xdata + group_id * X_offset,
image_shape.GetDims().data(),
col_buffer_shape.data(),

View file

@ -327,20 +327,100 @@ void Axpby(
Provider* provider);
template <typename T, class Provider, int order>
void Im2colNd(
const T* data_img,
const int64_t* im_shape,
const int64_t* col_shape,
const int64_t img_size,
const int64_t col_size,
const int64_t* kernel_shape,
const int64_t* stride,
const int64_t* dilation,
const int64_t* pad,
const int64_t N,
T* data_col,
Provider* provider,
bool accumulate_output = false);
struct Im2colNd {
void operator()(
const T* data_img,
const int64_t* im_shape,
const int64_t* col_shape,
const int64_t img_size,
const int64_t col_size,
const int64_t* kernel_shape,
const int64_t* stride,
const int64_t* dilation,
const int64_t* pad,
const int64_t N,
T* data_col,
Provider* /*provider*/,
bool accumulate_output = false,
T padding_value = 0);
};
template <typename T, class Provider>
struct Im2colNd<T, Provider, StorageOrder::NCHW> {
void operator()(
const T* data_img,
const int64_t* im_shape,
const int64_t* col_shape,
const int64_t /*img_size*/,
const int64_t /*col_size*/,
const int64_t* kernel_shape,
const int64_t* stride,
const int64_t* dilation,
const int64_t* pad,
const int64_t N,
T* data_col,
Provider* /*provider*/,
bool accumulate_output = false,
T padding_value = 0) {
int64_t kernel_size = 1;
for (int64_t i = 0; i < N; ++i) {
kernel_size *= kernel_shape[i];
}
const int64_t channels_col = col_shape[0];
std::vector<int64_t> d_offset(N, 0);
std::vector<int64_t> d_iter(N, 0);
for (int64_t c_col = 0; c_col < channels_col; ++c_col) {
// Loop over spatial axes in reverse order to compute a per-axis offset.
int64_t offset = c_col;
for (int64_t d_i = N - 1; d_i >= 0; --d_i) {
if (d_i < N - 1) {
offset /= kernel_shape[d_i + 1];
}
d_offset[d_i] = offset % kernel_shape[d_i];
}
for (bool incremented = true; incremented;) {
// Loop over spatial axes in forward order to compute the indices in the
// image and column, and whether the index lies in the padding.
int64_t index_col = c_col;
int64_t index_im = c_col / kernel_size;
bool is_padding = false;
for (int64_t d_i = 0; d_i < N; ++d_i) {
const int64_t d = d_iter[d_i];
const int64_t d_im =
d * stride[d_i] - pad[d_i] + d_offset[d_i] * dilation[d_i];
is_padding |= d_im < 0 || d_im >= im_shape[d_i + 1];
index_col *= col_shape[d_i + 1];
index_col += d;
index_im *= im_shape[d_i + 1];
index_im += d_im;
}
if (!accumulate_output) {
if (is_padding) {
data_col[index_col] = padding_value;
} else {
data_col[index_col] = data_img[index_im];
}
} else if (!is_padding) { // col2im
data_col[index_im] += data_img[index_col];
}
// Loop over spatial axes in reverse order to choose an index,
// like counting.
incremented = false;
for (int64_t d_i = N - 1; d_i >= 0; --d_i) {
const int64_t d_max = col_shape[d_i + 1];
ORT_ENFORCE(d_iter[d_i] < d_max);
if (d_iter[d_i] == d_max - 1) {
d_iter[d_i] = 0;
} else { // d_iter[d_i] < d_max - 1
++d_iter[d_i];
incremented = true;
break;
}
}
} // while(incremented) {
} // for (int c = 0; c < channels_col; ++c) {
}
};
template <typename T, class Provider, int order>
void Col2imNd(

View file

@ -475,15 +475,15 @@ void GemmBatched<float, CPUMathUtil>(
}
}
// MKL will be implmenet as an execution provider
////////////////////////////////////////////////////////////////////////////////
// MKL VML alternatives.
// Depending on whether we are using MKL, we will delegate the Caffe math
// functions that are VML-related to either the VML call or the Eigen
// implementation. If you are setting the flags (such as AVX) right for your CPU
// architecture, usually Eigen will deliver a throughput as fast as the VML
// functions.
////////////////////////////////////////////////////////////////////////////////
// MKL will be implmenet as an execution provider
////////////////////////////////////////////////////////////////////////////////
// MKL VML alternatives.
// Depending on whether we are using MKL, we will delegate the Caffe math
// functions that are VML-related to either the VML call or the Eigen
// implementation. If you are setting the flags (such as AVX) right for your CPU
// architecture, usually Eigen will deliver a throughput as fast as the VML
// functions.
////////////////////////////////////////////////////////////////////////////////
#define DELEGATE_SIMPLE_UNARY_FUNCTION(T, Funcname, expr) \
template <> \
@ -859,80 +859,6 @@ void Select<float, CPUMathUtil>(
y[i] = x[i * D + idx[i]];
}
}
// Ported from caffe 1.
template <>
void Im2colNd<float, CPUMathUtil, StorageOrder::NCHW>(
const float* data_img,
const int64_t* im_shape,
const int64_t* col_shape,
const int64_t /* img_size*/,
const int64_t /* col_size*/,
const int64_t* kernel_shape,
const int64_t* stride,
const int64_t* dilation,
const int64_t* pad,
const int64_t N,
float* data_col,
CPUMathUtil* /* context */,
bool accumulate_output) {
int64_t kernel_size = 1;
for (int64_t i = 0; i < N; ++i) {
kernel_size *= kernel_shape[i];
}
const int64_t channels_col = col_shape[0];
std::vector<int64_t> d_offset(N, 0);
std::vector<int64_t> d_iter(N, 0);
for (int64_t c_col = 0; c_col < channels_col; ++c_col) {
// Loop over spatial axes in reverse order to compute a per-axis offset.
int64_t offset = c_col;
for (int64_t d_i = N - 1; d_i >= 0; --d_i) {
if (d_i < N - 1) {
offset /= kernel_shape[d_i + 1];
}
d_offset[d_i] = offset % kernel_shape[d_i];
}
for (bool incremented = true; incremented;) {
// Loop over spatial axes in forward order to compute the indices in the
// image and column, and whether the index lies in the padding.
int64_t index_col = c_col;
int64_t index_im = c_col / kernel_size;
bool is_padding = false;
for (int64_t d_i = 0; d_i < N; ++d_i) {
const int64_t d = d_iter[d_i];
const int64_t d_im =
d * stride[d_i] - pad[d_i] + d_offset[d_i] * dilation[d_i];
is_padding |= d_im < 0 || d_im >= im_shape[d_i + 1];
index_col *= col_shape[d_i + 1];
index_col += d;
index_im *= im_shape[d_i + 1];
index_im += d_im;
}
if (!accumulate_output) {
if (is_padding) {
data_col[index_col] = 0;
} else {
data_col[index_col] = data_img[index_im];
}
} else if (!is_padding) { // col2im
data_col[index_im] += data_img[index_col];
}
// Loop over spatial axes in reverse order to choose an index,
// like counting.
incremented = false;
for (int64_t d_i = N - 1; d_i >= 0; --d_i) {
const int64_t d_max = col_shape[d_i + 1];
ORT_ENFORCE(d_iter[d_i] < d_max);
if (d_iter[d_i] == d_max - 1) {
d_iter[d_i] = 0;
} else { // d_iter[d_i] < d_max - 1
++d_iter[d_i];
incremented = true;
break;
}
}
} // while(incremented) {
} // for (int c = 0; c < channels_col; ++c) {
}
template <>
void Col2imNd<float, CPUMathUtil, StorageOrder::NCHW>(
@ -949,7 +875,7 @@ void Col2imNd<float, CPUMathUtil, StorageOrder::NCHW>(
float* data_img,
CPUMathUtil* context) {
Set<float, CPUMathUtil>(img_size, 0, data_img, context);
Im2colNd<float, CPUMathUtil, StorageOrder::NCHW>(
Im2colNd<float, CPUMathUtil, StorageOrder::NCHW>()(
data_col,
img_shape,
col_shape,