mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-04 04:07:22 +00:00
Support trilinear sampling in Resize CPU and CUDA kernels (#5300)
This commit is contained in:
parent
e71668f92c
commit
06cd81d791
6 changed files with 580 additions and 91 deletions
|
|
@ -321,24 +321,46 @@ void UpsampleBilinear(int64_t batch_size,
|
|||
AllocatorPtr& alloc,
|
||||
GetOriginalCoordinateFunc get_original_coordinate) {
|
||||
std::vector<float> y_original;
|
||||
std::vector<float> x_original;
|
||||
y_original.reserve(output_height);
|
||||
|
||||
std::vector<float> x_original;
|
||||
x_original.reserve(output_width);
|
||||
|
||||
// For each index in the output height and output width, cache its corresponding indices in the input
|
||||
// while multiplying it with the input stride for that dimension (cache because we don't have to re-compute
|
||||
// each time we come across the output width/ output height value while iterating the output image tensor
|
||||
SafeInt<size_t> idx_buffer_size = SafeInt<size_t>(2) * sizeof(int64_t) * (output_height + output_width);
|
||||
|
||||
// For each index in the output height and output width, cache its corresponding "weights/scales" for its
|
||||
// corresponding indices in the input which proportionately indicates how much they will influence the final
|
||||
// pixel value in the output
|
||||
// (cache because we don't have to re-compute each time we come across the output width/ output height value while iterating the output image tensor
|
||||
SafeInt<size_t> scale_buffer_size = SafeInt<size_t>(2) * sizeof(float_t) * (output_height + output_width);
|
||||
|
||||
// Limit number of allocations to just 1
|
||||
auto inx_scale_data_buffer = alloc->Alloc(idx_buffer_size + scale_buffer_size);
|
||||
BufferUniquePtr idx_scale_data_buffer_holder(inx_scale_data_buffer, BufferDeleter(alloc));
|
||||
|
||||
// Get pointers to appropriate memory locations in the scratch buffer
|
||||
auto* idx_data = static_cast<int64_t*>(idx_scale_data_buffer_holder.get());
|
||||
|
||||
// input_width is the stride for the height dimension
|
||||
int64_t* input_width_mul_y1 = idx_data;
|
||||
int64_t* input_width_mul_y2 = idx_data + output_height;
|
||||
int64_t* in_x1 = idx_data + 2 * output_height;
|
||||
int64_t* in_x2 = idx_data + 2 * output_height + output_width;
|
||||
int64_t* input_width_mul_y2 = input_width_mul_y1 + output_height;
|
||||
|
||||
// stride for width is 1 (no multiplication needed)
|
||||
int64_t* in_x1 = input_width_mul_y1 + 2 * output_height;
|
||||
int64_t* in_x2 = in_x1 + output_width;
|
||||
|
||||
auto* scale_data = reinterpret_cast<float*>(in_x2 + output_width);
|
||||
float* dy1 = scale_data;
|
||||
float* dy2 = scale_data + output_height;
|
||||
float* dx1 = scale_data + 2 * output_height;
|
||||
float* dx2 = scale_data + 2 * output_height + output_width;
|
||||
|
||||
float* dy1 = scale_data;
|
||||
float* dy2 = dy1 + output_height;
|
||||
|
||||
float* dx1 = dy1 + 2 * output_height;
|
||||
float* dx2 = dx1 + output_width;
|
||||
|
||||
// Start processing
|
||||
auto roi_y_start = roi.size() / 2 - 2;
|
||||
auto roi_y_end = roi.size() - 2;
|
||||
for (int64_t y = 0; y < output_height; ++y) {
|
||||
|
|
@ -379,8 +401,8 @@ void UpsampleBilinear(int64_t batch_size,
|
|||
in_x1[x] = std::min(static_cast<int64_t>(in_x), input_width - 1);
|
||||
in_x2[x] = std::min(in_x1[x] + 1, input_width - 1);
|
||||
|
||||
dx1[x] = std::abs(in_x - in_x1[x]);
|
||||
dx2[x] = std::abs(in_x - in_x2[x]);
|
||||
dx1[x] = std::fabs(in_x - in_x1[x]);
|
||||
dx2[x] = std::fabs(in_x - in_x2[x]);
|
||||
if (in_x1[x] == in_x2[x]) {
|
||||
dx1[x] = 0.5f;
|
||||
dx2[x] = 0.5f;
|
||||
|
|
@ -400,6 +422,7 @@ void UpsampleBilinear(int64_t batch_size,
|
|||
continue;
|
||||
}
|
||||
|
||||
// subscript ordering in the variable - (xy)
|
||||
T X11 = Xdata[input_width_mul_y1[y] + in_x1[x]];
|
||||
T X21 = Xdata[input_width_mul_y1[y] + in_x2[x]];
|
||||
T X12 = Xdata[input_width_mul_y2[y] + in_x1[x]];
|
||||
|
|
@ -417,6 +440,198 @@ void UpsampleBilinear(int64_t batch_size,
|
|||
}
|
||||
}
|
||||
|
||||
// The following method supports a 5-D input in 'Linear mode'
|
||||
// that amounts to 'Trilinear' Upsampling/Resizing in the sense that it assumes
|
||||
// the scale values for the outermost 2 dimensions are 1.
|
||||
// This is the common use-case where the 5-D input (batched multi-channel volumes)
|
||||
// is usually of shape [N, C, D, H, W] and the scales are [1.0, 1.0, depth_scale, height_scale, width_scale]
|
||||
template <typename T>
|
||||
void UpsampleTrilinear(int64_t batch_size,
|
||||
int64_t num_channels,
|
||||
int64_t input_depth,
|
||||
int64_t input_height,
|
||||
int64_t input_width,
|
||||
int64_t output_depth,
|
||||
int64_t output_height,
|
||||
int64_t output_width,
|
||||
float depth_scale,
|
||||
float height_scale,
|
||||
float width_scale,
|
||||
const std::vector<float>& roi,
|
||||
bool use_extrapolation,
|
||||
float extrapolation_value,
|
||||
const T* Xdata,
|
||||
T* Ydata,
|
||||
AllocatorPtr& alloc,
|
||||
GetOriginalCoordinateFunc get_original_coordinate) {
|
||||
std::vector<float> z_original;
|
||||
z_original.reserve(output_depth);
|
||||
|
||||
std::vector<float> y_original;
|
||||
y_original.reserve(output_height);
|
||||
|
||||
std::vector<float> x_original;
|
||||
x_original.reserve(output_width);
|
||||
|
||||
// For each index in the output height and output width, cache its corresponding indices in the input
|
||||
// while multiplying it with the input stride for that dimension (cache because we don't have to re-compute
|
||||
// each time we come across the output width/ output height value while iterating the output image tensor
|
||||
SafeInt<size_t> idx_buffer_size = SafeInt<size_t>(2) * sizeof(int64_t) *
|
||||
(output_depth + output_height + output_width);
|
||||
|
||||
// For each index in the output height and output width, cache its corresponding "weights/scales" for its
|
||||
// corresponding indices in the input which proportionately indicates how much they will influence the final
|
||||
// pixel value in the output
|
||||
// (cache because we don't have to re-compute each time we come across the output width/ output height value while iterating the output image tensor
|
||||
SafeInt<size_t> scale_buffer_size = SafeInt<size_t>(2) * sizeof(float_t) *
|
||||
(output_depth + output_height + output_width);
|
||||
|
||||
// Limit number of allocations to just 1
|
||||
auto inx_scale_data_buffer = alloc->Alloc(idx_buffer_size + scale_buffer_size);
|
||||
BufferUniquePtr idx_scale_data_buffer_holder(inx_scale_data_buffer, BufferDeleter(alloc));
|
||||
|
||||
// Get pointers to appropriate memory locations in the scratch buffer
|
||||
auto* idx_data = static_cast<int64_t*>(idx_scale_data_buffer_holder.get());
|
||||
|
||||
// input_width * input_height is the stride for the depth dimension
|
||||
int64_t* input_height_width_mul_z1 = idx_data;
|
||||
int64_t* input_height_width_mul_z2 = input_height_width_mul_z1 + output_depth;
|
||||
|
||||
// input_width is the stride for the height dimension
|
||||
int64_t* input_width_mul_y1 = input_height_width_mul_z1 + 2 * output_depth;
|
||||
int64_t* input_width_mul_y2 = input_width_mul_y1 + output_height;
|
||||
|
||||
// stride for width is 1 (no multiplication needed)
|
||||
int64_t* in_x1 = input_width_mul_y1 + 2 * output_height;
|
||||
int64_t* in_x2 = in_x1 + output_width;
|
||||
|
||||
auto* scale_data = reinterpret_cast<float*>(in_x2 + output_width);
|
||||
|
||||
float* dz1 = scale_data;
|
||||
float* dz2 = dz1 + output_depth;
|
||||
|
||||
float* dy1 = dz1 + 2 * output_depth;
|
||||
float* dy2 = dy1 + output_height;
|
||||
|
||||
float* dx1 = dy1 + 2 * output_height;
|
||||
float* dx2 = dx1 + output_width;
|
||||
|
||||
// Start processing
|
||||
auto roi_z_start = roi.size() / 2 - 3;
|
||||
auto roi_z_end = roi.size() - 3;
|
||||
for (int64_t z = 0; z < output_depth; ++z) {
|
||||
float in_z = depth_scale == 1 ? static_cast<float>(z)
|
||||
: get_original_coordinate(static_cast<float>(z), depth_scale,
|
||||
static_cast<float>(output_depth), static_cast<float>(input_depth),
|
||||
roi[roi_z_start], roi[roi_z_end]);
|
||||
z_original.emplace_back(in_z);
|
||||
in_z = std::max(0.0f, std::min(in_z, static_cast<float>(input_depth - 1)));
|
||||
|
||||
const int64_t in_z1 = std::min(static_cast<int64_t>(in_z), input_depth - 1);
|
||||
const int64_t in_z2 = std::min(in_z1 + 1, input_depth - 1);
|
||||
dz1[z] = std::fabs(in_z - in_z1);
|
||||
dz2[z] = std::fabs(in_z - in_z2);
|
||||
|
||||
if (in_z1 == in_z2) {
|
||||
dz1[z] = 0.5f;
|
||||
dz2[z] = 0.5f;
|
||||
}
|
||||
|
||||
input_height_width_mul_z1[z] = input_height * input_width * in_z1;
|
||||
input_height_width_mul_z2[z] = input_height * input_width * in_z2;
|
||||
}
|
||||
|
||||
auto roi_y_start = roi.size() / 2 - 2;
|
||||
auto roi_y_end = roi.size() - 2;
|
||||
for (int64_t y = 0; y < output_height; ++y) {
|
||||
float in_y = height_scale == 1 ? static_cast<float>(y)
|
||||
: get_original_coordinate(static_cast<float>(y), height_scale,
|
||||
static_cast<float>(output_height), static_cast<float>(input_height),
|
||||
roi[roi_y_start], roi[roi_y_end]);
|
||||
y_original.emplace_back(in_y);
|
||||
in_y = std::max(0.0f, std::min(in_y, static_cast<float>(input_height - 1)));
|
||||
|
||||
const int64_t in_y1 = std::min(static_cast<int64_t>(in_y), input_height - 1);
|
||||
const int64_t in_y2 = std::min(in_y1 + 1, input_height - 1);
|
||||
dy1[y] = std::fabs(in_y - in_y1);
|
||||
dy2[y] = std::fabs(in_y - in_y2);
|
||||
|
||||
if (in_y1 == in_y2) {
|
||||
dy1[y] = 0.5f;
|
||||
dy2[y] = 0.5f;
|
||||
}
|
||||
|
||||
input_width_mul_y1[y] = input_width * in_y1;
|
||||
input_width_mul_y2[y] = input_width * in_y2;
|
||||
}
|
||||
|
||||
auto roi_x_start = roi.size() / 2 - 1;
|
||||
auto roi_x_end = roi.size() - 1;
|
||||
for (int64_t x = 0; x < output_width; ++x) {
|
||||
float in_x = width_scale == 1 ? static_cast<float>(x)
|
||||
: get_original_coordinate(static_cast<float>(x), width_scale,
|
||||
static_cast<float>(output_width), static_cast<float>(input_width),
|
||||
roi[roi_x_start], roi[roi_x_end]);
|
||||
x_original.emplace_back(in_x);
|
||||
in_x = std::max(0.0f, std::min(in_x, static_cast<float>(input_width - 1)));
|
||||
|
||||
in_x1[x] = std::min(static_cast<int64_t>(in_x), input_width - 1);
|
||||
in_x2[x] = std::min(in_x1[x] + 1, input_width - 1);
|
||||
|
||||
dx1[x] = std::fabs(in_x - in_x1[x]);
|
||||
dx2[x] = std::fabs(in_x - in_x2[x]);
|
||||
if (in_x1[x] == in_x2[x]) {
|
||||
dx1[x] = 0.5f;
|
||||
dx2[x] = 0.5f;
|
||||
}
|
||||
}
|
||||
|
||||
for (int64_t n = 0; n < batch_size; ++n) {
|
||||
for (int64_t c = 0; c < num_channels; ++c) {
|
||||
for (int64_t z = 0; z < output_depth; ++z) {
|
||||
for (int64_t y = 0; y < output_height; ++y) {
|
||||
for (int64_t x = 0; x < output_width; ++x) {
|
||||
// when use_extrapolation is set and original index of x or y is out of the dim range
|
||||
// then use extrapolation_value as the output value.
|
||||
if (use_extrapolation &&
|
||||
((z_original[z] < 0 || z_original[z] > static_cast<float>(input_depth - 1)) ||
|
||||
(y_original[y] < 0 || y_original[y] > static_cast<float>(input_height - 1)) ||
|
||||
(x_original[x] < 0 || x_original[x] > static_cast<float>(input_width - 1)))) {
|
||||
Ydata[output_width * output_height * z + output_width * y + x] =
|
||||
static_cast<T>(extrapolation_value);
|
||||
continue;
|
||||
}
|
||||
|
||||
// subscript ordering in the variable - (xyz)
|
||||
T X111 = Xdata[input_height_width_mul_z1[z] + input_width_mul_y1[y] + in_x1[x]];
|
||||
T X211 = Xdata[input_height_width_mul_z1[z] + input_width_mul_y1[y] + in_x2[x]];
|
||||
T X121 = Xdata[input_height_width_mul_z1[z] + input_width_mul_y2[y] + in_x1[x]];
|
||||
T X221 = Xdata[input_height_width_mul_z1[z] + input_width_mul_y2[y] + in_x2[x]];
|
||||
|
||||
T X112 = Xdata[input_height_width_mul_z2[z] + input_width_mul_y1[y] + in_x1[x]];
|
||||
T X212 = Xdata[input_height_width_mul_z2[z] + input_width_mul_y1[y] + in_x2[x]];
|
||||
T X122 = Xdata[input_height_width_mul_z2[z] + input_width_mul_y2[y] + in_x1[x]];
|
||||
T X222 = Xdata[input_height_width_mul_z2[z] + input_width_mul_y2[y] + in_x2[x]];
|
||||
|
||||
Ydata[output_width * output_height * z + output_width * y + x] =
|
||||
static_cast<T>(dx2[x] * dy2[y] * dz2[z] * X111 +
|
||||
dx1[x] * dy2[y] * dz2[z] * X211 +
|
||||
dx2[x] * dy1[y] * dz2[z] * X121 +
|
||||
dx1[x] * dy1[y] * dz2[z] * X221 +
|
||||
|
||||
dx2[x] * dy2[y] * dz1[z] * X112 +
|
||||
dx1[x] * dy2[y] * dz1[z] * X212 +
|
||||
dx2[x] * dy1[y] * dz1[z] * X122 +
|
||||
dx1[x] * dy1[y] * dz1[z] * X222);
|
||||
}
|
||||
}
|
||||
}
|
||||
Xdata += input_depth * input_height * input_width;
|
||||
Ydata += output_depth * output_width * output_height;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Calculates cubic coeff based on Robert Keys approach
|
||||
// https://ieeexplore.ieee.org/document/1163711
|
||||
std::array<float, CubicModeGridLength> GetCubicCoeffs(float s, float cubic_coeff_a = -0.75) {
|
||||
|
|
@ -491,7 +706,11 @@ void ResizeBiCubic(
|
|||
T* Ydata,
|
||||
GetOriginalCoordinateFunc get_original_coordinate) {
|
||||
std::vector<float> y_original;
|
||||
y_original.reserve(output_height);
|
||||
|
||||
std::vector<float> x_original;
|
||||
x_original.reserve(output_width);
|
||||
|
||||
std::unordered_map<float, std::array<float, CubicModeGridLength>> cubic_coeffs;
|
||||
std::unordered_map<float, std::unordered_map<int64_t, float>> coeff_to_1Dinterpolation_map;
|
||||
auto roi_y_start = roi.size() / 2 - 2;
|
||||
|
|
@ -660,33 +879,70 @@ Status Upsample<T>::BaseCompute(OpKernelContext* context,
|
|||
scales, roi, is_resize_, use_extrapolation_, extrapolation_value_,
|
||||
use_nearest2x_optimization_, get_original_coordinate_, get_nearest_pixel_);
|
||||
case UpsampleMode::LINEAR: {
|
||||
//The correct behavior of 'linear' mode for an N-D input is not clear right now,
|
||||
//so only support 'bilinear' with 2-D or 4-D input tensor with outermost 2 scales as 1 in the 4-D case
|
||||
// Supports 'bilinear' and 'trilinear' sampling only
|
||||
|
||||
//'bilinear' == 2-D input or 4-D input with outermost 2 scales as 1
|
||||
if (dims.size() == 2 || dims.size() == 4) {
|
||||
bool is_2D = dims.size() == 2;
|
||||
|
||||
const int64_t batch_size = is_2D ? 1 : dims[0];
|
||||
const int64_t num_channels = is_2D ? 1 : dims[1];
|
||||
const int64_t input_height = is_2D ? dims[0] : dims[2];
|
||||
const int64_t input_width = is_2D ? dims[1] : dims[3];
|
||||
|
||||
const int64_t output_height = is_2D ? output_dims[0] : output_dims[2];
|
||||
const int64_t output_width = is_2D ? output_dims[1] : output_dims[3];
|
||||
|
||||
AllocatorPtr alloc;
|
||||
ORT_RETURN_IF_ERROR(context->GetTempSpaceAllocator(&alloc));
|
||||
UpsampleBilinear(batch_size, num_channels, input_height, input_width, output_height, output_width,
|
||||
is_2D ? scales[0] : scales[2], is_2D ? scales[1] : scales[3], roi,
|
||||
use_extrapolation_, extrapolation_value_, X->template Data<T>(),
|
||||
Y->template MutableData<T>(), alloc, get_original_coordinate_);
|
||||
return Status::OK();
|
||||
} else if (dims.size() == 3 || dims.size() == 5) {
|
||||
//'trilinear' == 3-D input or 5-D input with outermost 2 scales as 1
|
||||
bool is_3D = dims.size() == 3;
|
||||
|
||||
const int64_t batch_size = is_3D ? 1 : dims[0];
|
||||
const int64_t num_channels = is_3D ? 1 : dims[1];
|
||||
const int64_t input_depth = is_3D ? dims[0] : dims[2];
|
||||
const int64_t input_height = is_3D ? dims[1] : dims[3];
|
||||
const int64_t input_width = is_3D ? dims[2] : dims[4];
|
||||
|
||||
const int64_t output_depth = is_3D ? output_dims[0] : output_dims[2];
|
||||
const int64_t output_height = is_3D ? output_dims[1] : output_dims[3];
|
||||
const int64_t output_width = is_3D ? output_dims[2] : output_dims[4];
|
||||
|
||||
AllocatorPtr alloc;
|
||||
ORT_RETURN_IF_ERROR(context->GetTempSpaceAllocator(&alloc));
|
||||
UpsampleTrilinear(batch_size, num_channels, input_depth, input_height, input_width,
|
||||
output_depth, output_height, output_width,
|
||||
is_3D ? scales[0] : scales[2], is_3D ? scales[1] : scales[3],
|
||||
is_3D ? scales[2] : scales[4], roi, use_extrapolation_, extrapolation_value_,
|
||||
X->template Data<T>(), Y->template MutableData<T>(), alloc, get_original_coordinate_);
|
||||
return Status::OK();
|
||||
} else {
|
||||
// User shouldn't hit this as the check has been performed in ScalesValidation()
|
||||
std::ostringstream oss;
|
||||
oss << "'Linear' mode only support 2-D inputs or 3-D inputs ('Bilinear', 'Trilinear') "
|
||||
"or 4-D inputs or 5-D inputs with the corresponding outermost 2 scale values "
|
||||
"being 1 in the ";
|
||||
oss << (is_resize_ ? "Resize operator" : "Upsample operator");
|
||||
return Status(ONNXRUNTIME, FAIL, oss.str());
|
||||
}
|
||||
}
|
||||
case UpsampleMode::CUBIC: {
|
||||
// Supports 'bicubic' sampling only
|
||||
|
||||
// User shouldn't hit this as the check has been performed in ScalesValidation()
|
||||
if (dims.size() != 2 && dims.size() != 4) {
|
||||
std::ostringstream oss;
|
||||
oss << "'Linear' mode only support 2-D inputs ('Bilinear') or 4-D inputs "
|
||||
oss << "'Cubic' mode only support 2-D inputs ('Bicubic') or 4-D inputs "
|
||||
"with the corresponding outermost 2 scale values being 1 in the ";
|
||||
oss << (is_resize_ ? "Resize operator" : "Upsample operator");
|
||||
return Status(ONNXRUNTIME, FAIL, oss.str());
|
||||
}
|
||||
|
||||
bool is_2D = dims.size() == 2;
|
||||
const int64_t batch_size = is_2D ? 1 : dims[0];
|
||||
const int64_t num_channels = is_2D ? 1 : dims[1];
|
||||
const int64_t input_height = is_2D ? dims[0] : dims[2];
|
||||
const int64_t input_width = is_2D ? dims[1] : dims[3];
|
||||
const int64_t output_height = is_2D ? output_dims[0] : output_dims[2];
|
||||
const int64_t output_width = is_2D ? output_dims[1] : output_dims[3];
|
||||
|
||||
AllocatorPtr alloc;
|
||||
ORT_RETURN_IF_ERROR(context->GetTempSpaceAllocator(&alloc));
|
||||
UpsampleBilinear(batch_size, num_channels, input_height, input_width, output_height, output_width,
|
||||
is_2D ? scales[0] : scales[2], is_2D ? scales[1] : scales[3], roi,
|
||||
use_extrapolation_, extrapolation_value_, X->template Data<T>(),
|
||||
Y->template MutableData<T>(), alloc, get_original_coordinate_);
|
||||
return Status::OK();
|
||||
}
|
||||
case UpsampleMode::CUBIC: {
|
||||
bool is_2D = dims.size() == 2;
|
||||
const int64_t batch_size = is_2D ? 1 : dims[0];
|
||||
const int64_t num_channels = is_2D ? 1 : dims[1];
|
||||
|
|
|
|||
|
|
@ -276,15 +276,26 @@ class UpsampleBase {
|
|||
}
|
||||
}
|
||||
|
||||
if (UpsampleMode::LINEAR == mode || UpsampleMode::CUBIC == mode) {
|
||||
if (UpsampleMode::LINEAR == mode) {
|
||||
ORT_ENFORCE(scales.size() == 2 ||
|
||||
(scales.size() == 4 && scales[0] == 1 && scales[1] == 1) ||
|
||||
scales.size() == 3 ||
|
||||
(scales.size() == 5 && scales[0] == 1 && scales[1] == 1),
|
||||
"'Linear' mode only support 2-D inputs or 3-D inputs ('Bilinear', 'Trilinear') "
|
||||
"or 4-D inputs or 5-D inputs with the corresponding outermost 2 scale values being 1 in the ",
|
||||
is_resize_ ? "Resize operator" : "Upsample operator");
|
||||
}
|
||||
|
||||
else if (UpsampleMode::CUBIC == mode) {
|
||||
ORT_ENFORCE(scales.size() == 2 || (scales.size() == 4 && scales[0] == 1 && scales[1] == 1),
|
||||
"'Linear' mode and 'Cubic' mode only support 2-D inputs ('Bilinear', 'Bicubic') or 4-D inputs "
|
||||
"'Cubic' mode only support 2-D inputs ('Bicubic') or 4-D inputs "
|
||||
"with the corresponding outermost 2 scale values being 1 in the ",
|
||||
is_resize_ ? "Resize operator" : "Upsample operator");
|
||||
}
|
||||
}
|
||||
|
||||
void ParseScalesData(const Tensor* scale, std::vector<float>& scales) const {
|
||||
void
|
||||
ParseScalesData(const Tensor* scale, std::vector<float>& scales) const {
|
||||
const auto* scale_data = scale->template Data<float>();
|
||||
int64_t scales_size = scale->Shape().Size();
|
||||
ORT_ENFORCE(scales_size > 0, "scales size should be greater than 0.");
|
||||
|
|
|
|||
|
|
@ -191,7 +191,7 @@ __global__ void _ResizeNearestMappingKernel(
|
|||
const TArray<int64_t> input_shape,
|
||||
const TArray<int64_t> output_shape,
|
||||
const TArray<float> scales,
|
||||
const TArray<float> roi,
|
||||
const TArray<float, 10> roi,
|
||||
const size_t total_dim_sum,
|
||||
bool extrapolation_enabled,
|
||||
CudaFunctionOriginalCoordinate transform_coordinate,
|
||||
|
|
@ -276,7 +276,7 @@ __global__ void _ResizeNearestKernel(
|
|||
output_data[id] = extrapolation_occured ? extrapolation_value : input_data[input_index];
|
||||
}
|
||||
|
||||
struct BilinearMappingInfo {
|
||||
struct LinearMappingInfo {
|
||||
int origin_;
|
||||
float weight_;
|
||||
int extrapolate_;
|
||||
|
|
@ -291,7 +291,7 @@ __global__ void _ResizeBilinearCoordinateMapping(
|
|||
float roi_width_start, float roi_width_end,
|
||||
const size_t SumHW, bool extrapolation_enabled,
|
||||
CudaFunctionOriginalCoordinate transform_coordinate,
|
||||
BilinearMappingInfo* dims_mapping) {
|
||||
LinearMappingInfo* dims_mapping) {
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id, SumHW);
|
||||
if (id < output_height) { // y = id
|
||||
float input_y = scale_height == 1 ? static_cast<float>(id) :
|
||||
|
|
@ -316,7 +316,7 @@ __global__ void _ResizeBilinearCoordinateMapping(
|
|||
}
|
||||
}
|
||||
|
||||
// The following method supports a N-D input in 'Linear mode'. Last two dimension is [H, W].
|
||||
// The following method supports a 2-D or 4-D input in 'Linear mode'. Last two dimension is [H, W].
|
||||
// the scale values for the outer dimensions except last two are 1.
|
||||
template <typename T>
|
||||
__global__ void _ResizeBilinearKernel(
|
||||
|
|
@ -325,7 +325,7 @@ __global__ void _ResizeBilinearKernel(
|
|||
fast_divmod div_output_width, fast_divmod div_output_image,
|
||||
const T* input_data, T* output_data, const size_t N,
|
||||
const T extrapolation_value,
|
||||
BilinearMappingInfo* dims_mapping) {
|
||||
LinearMappingInfo* dims_mapping) {
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id, N);
|
||||
int bxc, output_image_index;
|
||||
div_output_image.divmod(id, bxc, output_image_index);
|
||||
|
|
@ -359,6 +359,124 @@ __global__ void _ResizeBilinearKernel(
|
|||
x11 * static_cast<T>(y_offset_0 * x_offset_0);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void _ResizeTrilinearCoordinateMapping(
|
||||
int64_t input_depth, int64_t input_height, int64_t input_width,
|
||||
int64_t output_depth, int64_t output_height, int64_t output_width,
|
||||
float scale_depth, float scale_height, float scale_width,
|
||||
float roi_depth_start, float roi_depth_end,
|
||||
float roi_height_start, float roi_height_end,
|
||||
float roi_width_start, float roi_width_end,
|
||||
const size_t SumDHW, bool extrapolation_enabled,
|
||||
CudaFunctionOriginalCoordinate transform_coordinate,
|
||||
LinearMappingInfo* dims_mapping) {
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id, SumDHW);
|
||||
if (id < output_depth) { // z = id
|
||||
float input_z = scale_depth == 1 ? static_cast<float>(id) :
|
||||
transform_coordinate(static_cast<float>(id), scale_depth,
|
||||
static_cast<float>(output_depth), static_cast<float>(input_depth),
|
||||
roi_depth_start, roi_depth_end);
|
||||
dims_mapping[id].extrapolate_ = (int)(extrapolation_enabled && (input_z < 0 || input_z > static_cast<float>(input_depth - 1)));
|
||||
input_z = max(0.0f, min(input_z, static_cast<float>(input_depth - 1)));
|
||||
int z_int = static_cast<int>(input_z);
|
||||
dims_mapping[id].origin_ = z_int;
|
||||
dims_mapping[id].weight_ = (z_int >= input_depth - 1) ? 0.5f : input_z - z_int;
|
||||
} else if (id >= output_depth && id < (output_depth + output_height)) { // y = id - output_depth
|
||||
float input_y = scale_height == 1 ? static_cast<float>(id - output_depth) :
|
||||
transform_coordinate(static_cast<float>(id - output_depth), scale_height,
|
||||
static_cast<float>(output_height), static_cast<float>(input_height),
|
||||
roi_height_start, roi_height_end);
|
||||
|
||||
dims_mapping[id].extrapolate_ = (int)(extrapolation_enabled && (input_y < 0 || input_y > static_cast<float>(input_height - 1)));
|
||||
input_y = max(0.0f, min(input_y, static_cast<float>(input_height - 1)));
|
||||
int y_int = static_cast<int>(input_y);
|
||||
dims_mapping[id].origin_ = y_int;
|
||||
dims_mapping[id].weight_ = (y_int >= input_height - 1) ? 0.5f : input_y - y_int;
|
||||
} else { //x = id - output_depth - output_height
|
||||
float input_x = scale_width == 1 ? static_cast<float>(id - output_depth - output_height) :
|
||||
transform_coordinate(static_cast<float>(id - output_depth - output_height), scale_width,
|
||||
static_cast<float>(output_width), static_cast<float>(input_width),
|
||||
roi_width_start, roi_width_end);
|
||||
dims_mapping[id].extrapolate_ = (int)(extrapolation_enabled && (input_x < 0 || input_x > static_cast<float>(input_width - 1)));
|
||||
input_x = max(0.0f, min(input_x, static_cast<float>(input_width - 1)));
|
||||
int x_int = static_cast<int>(input_x);
|
||||
dims_mapping[id].origin_ = x_int;
|
||||
dims_mapping[id].weight_ = (x_int >= input_width - 1) ? 0.5f : input_x - x_int;
|
||||
}
|
||||
}
|
||||
|
||||
// The following method supports a 3-D or 5-D input in 'Linear mode'. Last two dimension is [D, sH, W].
|
||||
// the scale values for the outer dimensions except last two are 1.
|
||||
template <typename T>
|
||||
__global__ void _ResizeTrilinearKernel(
|
||||
int64_t input_depth, int64_t input_height, int64_t input_width,
|
||||
int64_t output_depth, int64_t output_height, int64_t output_width,
|
||||
fast_divmod div_output_height, fast_divmod div_output_width, fast_divmod div_output_image,
|
||||
const T* input_data, T* output_data, const size_t N,
|
||||
const T extrapolation_value,
|
||||
LinearMappingInfo* dims_mapping) {
|
||||
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id, N);
|
||||
int bxc, output_image_index;
|
||||
div_output_image.divmod(id, bxc, output_image_index);
|
||||
CUDA_LONG input_index = bxc * input_depth * input_height * input_width;
|
||||
int output_z, output_y, output_x, temp;
|
||||
|
||||
div_output_height.divmod(output_image_index, output_z, temp);
|
||||
div_output_width.divmod(temp, output_y, output_x);
|
||||
|
||||
if (dims_mapping[output_z].extrapolate_ ||
|
||||
dims_mapping[output_y + output_depth].extrapolate_ ||
|
||||
dims_mapping[output_x + output_depth + output_height].extrapolate_) {
|
||||
output_data[id] = extrapolation_value;
|
||||
return;
|
||||
}
|
||||
|
||||
float z_offset_0 = dims_mapping[output_z].weight_;
|
||||
int z_int = dims_mapping[output_z].origin_;
|
||||
|
||||
float y_offset_0 = dims_mapping[output_y + output_depth].weight_;
|
||||
int y_int = dims_mapping[output_y + output_depth].origin_;
|
||||
|
||||
float x_offset_0 = dims_mapping[output_x + output_depth + output_height].weight_;
|
||||
int x_int = dims_mapping[output_x + output_depth + output_height].origin_;
|
||||
|
||||
input_index += z_int * input_height * input_width + y_int * input_width + x_int;
|
||||
|
||||
T x000 = input_data[input_index];
|
||||
|
||||
bool end_of_h = (y_int >= input_height - 1);
|
||||
bool end_of_w = (x_int >= input_width - 1);
|
||||
|
||||
T x100 = end_of_w ? x000 : input_data[input_index + 1];
|
||||
T x010 = end_of_h ? x000 : input_data[input_index + input_width];
|
||||
T x110 = end_of_w ? x010 : (end_of_h ? x100 : input_data[input_index + input_width + 1]);
|
||||
|
||||
bool end_of_d = (z_int >= input_depth - 1);
|
||||
if (!end_of_d) {
|
||||
input_index = input_index + input_height * input_width;
|
||||
}
|
||||
|
||||
T x001 = end_of_d ? x000 : input_data[input_index];
|
||||
|
||||
T x101 = end_of_w ? x001 : input_data[input_index + 1];
|
||||
T x011 = end_of_h ? x001 : input_data[input_index + input_width];
|
||||
T x111 = end_of_w ? x011 : (end_of_h ? x101 : input_data[input_index + input_width + 1]);
|
||||
|
||||
float z_offset_1 = 1.0f - z_offset_0;
|
||||
float y_offset_1 = 1.0f - y_offset_0;
|
||||
float x_offset_1 = 1.0f - x_offset_0;
|
||||
output_data[id] =
|
||||
x000 * static_cast<T>(z_offset_1 * y_offset_1 * x_offset_1) +
|
||||
x010 * static_cast<T>(z_offset_1 * y_offset_0 * x_offset_1) +
|
||||
x100 * static_cast<T>(z_offset_1 * y_offset_1 * x_offset_0) +
|
||||
x110 * static_cast<T>(z_offset_1 * y_offset_0 * x_offset_0) +
|
||||
|
||||
x001 * static_cast<T>(z_offset_0 * y_offset_1 * x_offset_1) +
|
||||
x011 * static_cast<T>(z_offset_0 * y_offset_0 * x_offset_1) +
|
||||
x101 * static_cast<T>(z_offset_0 * y_offset_1 * x_offset_0) +
|
||||
x111 * static_cast<T>(z_offset_0 * y_offset_0 * x_offset_0);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ float CubicInterpolationRowwise(
|
||||
const T* image, int x, int y, int input_height, int input_width,
|
||||
|
|
@ -464,7 +582,7 @@ size_t CalcResizeBufferSize(const onnxruntime::UpsampleMode upsample_mode,
|
|||
case UpsampleMode::NN:
|
||||
return sizeof(int64_t) * output_dims.size() + sizeof(NearestMappingInfo) * std::accumulate(output_dims.begin(), output_dims.end(), 0);
|
||||
case UpsampleMode::LINEAR:
|
||||
return sizeof(BilinearMappingInfo) * std::accumulate(output_dims.rbegin(), output_dims.rbegin() + 2, 0);
|
||||
return sizeof(LinearMappingInfo) * std::accumulate(output_dims.rbegin(), output_dims.rbegin() + 2, 0);
|
||||
case UpsampleMode::CUBIC:
|
||||
return sizeof(CubicMappingInfo) * std::accumulate(output_dims.rbegin(), output_dims.rbegin() + 2, 0);
|
||||
}
|
||||
|
|
@ -479,7 +597,7 @@ void ResizeNearestImpl(
|
|||
TArray<int64_t>& input_strides,
|
||||
TArray<fast_divmod>& output_div_pitches,
|
||||
TArray<float>& scales_vals,
|
||||
TArray<float>& roi_vals,
|
||||
TArray<float, 10>& roi_vals,
|
||||
const T* input_data,
|
||||
T* output_data,
|
||||
const size_t N,
|
||||
|
|
@ -556,7 +674,7 @@ void ResizeImpl(
|
|||
TArray<int64_t>& input_strides,
|
||||
TArray<fast_divmod>& output_div_pitches,
|
||||
TArray<float>& scales_vals,
|
||||
TArray<float>& roi_vals,
|
||||
TArray<float, 10>& roi_vals,
|
||||
const T* input_data,
|
||||
T* output_data,
|
||||
const size_t N,
|
||||
|
|
@ -587,45 +705,90 @@ void ResizeImpl(
|
|||
return;
|
||||
}
|
||||
|
||||
int blocksPerGrid = (int)(ceil(static_cast<float>(N) / GridDim::maxThreadsPerBlock));
|
||||
fast_divmod div_output_image = (rank > 2) ? output_div_pitches[rank - 3] : fast_divmod(gsl::narrow_cast<int>(N));
|
||||
// We support a special case of bilinear or bicubic if the input data is 4D with the outer 2 scales being 1.0
|
||||
// We would have validated the outer scale values by the time execution reaches this
|
||||
bool is_2D = (rank == 2 || rank == 4);
|
||||
|
||||
// We support a special case of trilinear or tricubic if the input data is 5D with the outer 2 scales being 1.0
|
||||
// We would have validated the outer scale values by the time execution reaches this
|
||||
bool is_3D = (rank == 3 || rank == 5);
|
||||
|
||||
// Should not hit this as we have already validated input rank/scales and we provide verbose error messages
|
||||
// to the user.
|
||||
ORT_ENFORCE(is_2D || is_3D, "Only bilinear/trilinear and bicubic modes are supported in Resize");
|
||||
|
||||
int blocksPerGrid = static_cast<int>(ceil(static_cast<float>(N) / GridDim::maxThreadsPerBlock));
|
||||
fast_divmod div_output_image;
|
||||
if (is_2D) {
|
||||
div_output_image = (rank > 2) ? output_div_pitches[rank - 3] : fast_divmod(gsl::narrow_cast<int>(N));
|
||||
} else if (is_3D) {
|
||||
div_output_image = (rank > 3) ? output_div_pitches[rank - 4] : fast_divmod(gsl::narrow_cast<int>(N));
|
||||
}
|
||||
|
||||
int64_t output_depth = is_3D ? output_shape[rank - 3] : 0;
|
||||
int64_t output_height = output_shape[rank - 2];
|
||||
int64_t output_width = output_shape[rank - 1];
|
||||
int blocksPerDimsMappingGrid = (int)(ceil((output_height + output_width) / 32.0));
|
||||
int blocksPerDimsMappingGrid =
|
||||
static_cast<int>(ceil((output_depth + output_height + output_width) / 32.0));
|
||||
|
||||
switch (upsample_mode) {
|
||||
case UpsampleMode::LINEAR:
|
||||
_ResizeBilinearCoordinateMapping<T><<<blocksPerDimsMappingGrid, 32, 0>>>(
|
||||
input_shape[rank - 2], input_shape[rank - 1],
|
||||
output_height, output_width,
|
||||
scales_vals[rank - 2], scales_vals[rank - 1],
|
||||
roi_vals[rank - 2], roi_vals[rank - 2 + rank],
|
||||
roi_vals[rank - 1], roi_vals[rank - 1 + rank],
|
||||
output_height + output_width, extrapolation_enabled, transform_coordinate,
|
||||
reinterpret_cast<BilinearMappingInfo*>(dims_mapping));
|
||||
_ResizeBilinearKernel<T><<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0>>>(
|
||||
input_shape[rank - 2], input_shape[rank - 1],
|
||||
output_height, output_width,
|
||||
output_div_pitches[rank - 2], div_output_image,
|
||||
input_data, output_data, N, extrapolation_value,
|
||||
reinterpret_cast<BilinearMappingInfo*>(dims_mapping));
|
||||
return;
|
||||
if (is_2D) {
|
||||
_ResizeBilinearCoordinateMapping<T><<<blocksPerDimsMappingGrid, 32, 0>>>(
|
||||
input_shape[rank - 2], input_shape[rank - 1],
|
||||
output_height, output_width,
|
||||
scales_vals[rank - 2], scales_vals[rank - 1],
|
||||
roi_vals[rank - 2], roi_vals[rank - 2 + rank],
|
||||
roi_vals[rank - 1], roi_vals[rank - 1 + rank],
|
||||
output_height + output_width, extrapolation_enabled, transform_coordinate,
|
||||
reinterpret_cast<LinearMappingInfo*>(dims_mapping));
|
||||
_ResizeBilinearKernel<T><<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0>>>(
|
||||
input_shape[rank - 2], input_shape[rank - 1],
|
||||
output_height, output_width,
|
||||
output_div_pitches[rank - 2], div_output_image,
|
||||
input_data, output_data, N, extrapolation_value,
|
||||
reinterpret_cast<LinearMappingInfo*>(dims_mapping));
|
||||
return;
|
||||
} else if (is_3D) {
|
||||
_ResizeTrilinearCoordinateMapping<T><<<blocksPerDimsMappingGrid, 32, 0>>>(
|
||||
input_shape[rank - 3] , input_shape[rank - 2], input_shape[rank - 1],
|
||||
output_depth, output_height, output_width,
|
||||
scales_vals[rank - 3], scales_vals[rank - 2], scales_vals[rank - 1],
|
||||
roi_vals[rank - 3], roi_vals[rank - 3 + rank],
|
||||
roi_vals[rank - 2], roi_vals[rank - 2 + rank],
|
||||
roi_vals[rank - 1], roi_vals[rank - 1 + rank],
|
||||
output_depth + output_height + output_width, extrapolation_enabled, transform_coordinate,
|
||||
reinterpret_cast<LinearMappingInfo*>(dims_mapping));
|
||||
_ResizeTrilinearKernel<T><<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0>>>(
|
||||
input_shape[rank - 3], input_shape[rank - 2], input_shape[rank - 1],
|
||||
output_depth, output_height, output_width,
|
||||
output_div_pitches[rank - 3], output_div_pitches[rank - 2], div_output_image,
|
||||
input_data, output_data, N, extrapolation_value,
|
||||
reinterpret_cast<LinearMappingInfo*>(dims_mapping));
|
||||
return;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case UpsampleMode::CUBIC:
|
||||
_ResizeCubicCoordinateMapping<T><<<blocksPerDimsMappingGrid, 32, 0>>>(
|
||||
input_shape[rank - 2], input_shape[rank - 1],
|
||||
output_height, output_width,
|
||||
scales_vals[rank - 2], scales_vals[rank - 1],
|
||||
roi_vals[rank - 2], roi_vals[rank - 2 + rank],
|
||||
roi_vals[rank - 1], roi_vals[rank - 1 + rank],
|
||||
output_height + output_width, extrapolation_enabled,
|
||||
cubic_coeff_a, exclude_outside, transform_coordinate,
|
||||
reinterpret_cast<CubicMappingInfo*>(dims_mapping));
|
||||
_ResizeBiCubicKernel<T><<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0>>>(
|
||||
input_shape[rank - 2], input_shape[rank - 1],
|
||||
output_height, output_width,
|
||||
output_div_pitches[rank - 2], div_output_image,
|
||||
input_data, output_data, N, extrapolation_value,
|
||||
reinterpret_cast<CubicMappingInfo*>(dims_mapping));
|
||||
return;
|
||||
if (is_2D) {
|
||||
_ResizeCubicCoordinateMapping<T><<<blocksPerDimsMappingGrid, 32, 0>>>(
|
||||
input_shape[rank - 2], input_shape[rank - 1],
|
||||
output_height, output_width,
|
||||
scales_vals[rank - 2], scales_vals[rank - 1],
|
||||
roi_vals[rank - 2], roi_vals[rank - 2 + rank],
|
||||
roi_vals[rank - 1], roi_vals[rank - 1 + rank],
|
||||
output_height + output_width, extrapolation_enabled,
|
||||
cubic_coeff_a, exclude_outside, transform_coordinate,
|
||||
reinterpret_cast<CubicMappingInfo*>(dims_mapping));
|
||||
_ResizeBiCubicKernel<T><<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0>>>(
|
||||
input_shape[rank - 2], input_shape[rank - 1],
|
||||
output_height, output_width,
|
||||
output_div_pitches[rank - 2], div_output_image,
|
||||
input_data, output_data, N, extrapolation_value,
|
||||
reinterpret_cast<CubicMappingInfo*>(dims_mapping));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -638,7 +801,7 @@ void ResizeImpl(
|
|||
TArray<int64_t>& input_strides, \
|
||||
TArray<fast_divmod>& output_div_pitches, \
|
||||
TArray<float>& scales_vals, \
|
||||
TArray<float>& roi_vals, \
|
||||
TArray<float, 10>& roi_vals, \
|
||||
const T* input_data, \
|
||||
T* output_data, \
|
||||
const size_t N, \
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ void ResizeImpl(
|
|||
TArray<int64_t>& input_strides,
|
||||
TArray<fast_divmod>& output_div_pitches,
|
||||
TArray<float>& scales_vals,
|
||||
TArray<float>& roi,
|
||||
TArray<float, 10>& roi,
|
||||
const T* input_data,
|
||||
T* output_data,
|
||||
const size_t N,
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ Status Upsample<T>::BaseCompute(OpKernelContext* context,
|
|||
if (is_resize_) {
|
||||
TArray<int64_t> input_shape(X_dims);
|
||||
TArray<int64_t> output_shape(output_dims);
|
||||
TArray<float> roi_vals(roi);
|
||||
TArray<float, 10> roi_vals(roi);
|
||||
TArray<float> scales_vals(scales);
|
||||
|
||||
size_t temp_buffer_size = CalcResizeBufferSize(mode_, output_dims);
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
namespace onnxruntime {
|
||||
namespace test {
|
||||
TEST(ResizeOpTest, ResizeOpLineartDownSampleTest_tf_crop_and_resize) {
|
||||
TEST(ResizeOpTest, ResizeOpLinearDownSampleTest_tf_crop_and_resize) {
|
||||
OpTester test("Resize", 11);
|
||||
std::vector<float> roi{0.4f, 0.6f, 0.6f, 0.8f};
|
||||
std::vector<float> scales{};
|
||||
|
|
@ -65,7 +65,7 @@ TEST(ResizeOpTest, ResizeOpLinearDownSampleTest_tf_crop_and_resize_with_extrapol
|
|||
test.Run();
|
||||
}
|
||||
|
||||
TEST(ResizeOpTest, ResizeOpLineartDownSampleTest_4DBilinear) {
|
||||
TEST(ResizeOpTest, ResizeOpLinearDownSampleTest_4DBilinear) {
|
||||
OpTester test("Resize", 11);
|
||||
std::vector<float> roi{};
|
||||
std::vector<float> scales{1.0f, 1.0f, 0.6f, 0.6f};
|
||||
|
|
@ -87,7 +87,7 @@ TEST(ResizeOpTest, ResizeOpLineartDownSampleTest_4DBilinear) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
TEST(ResizeOpTest, ResizeOpLineartDownSampleTest_4DBilinear_align_corners) {
|
||||
TEST(ResizeOpTest, ResizeOpLinearDownSampleTest_4DBilinear_align_corners) {
|
||||
OpTester test("Resize", 11);
|
||||
std::vector<float> roi{};
|
||||
std::vector<float> scales{1.0f, 1.0f, 0.6f, 0.6f};
|
||||
|
|
@ -110,7 +110,7 @@ TEST(ResizeOpTest, ResizeOpLineartDownSampleTest_4DBilinear_align_corners) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
TEST(ResizeOpTest, ResizeOpLineartDownSampleTest_2DBilinear_pytorch_half_pixel) {
|
||||
TEST(ResizeOpTest, ResizeOpLinearDownSampleTest_2DBilinear_pytorch_half_pixel) {
|
||||
OpTester test("Resize", 11);
|
||||
std::vector<float> roi{};
|
||||
std::vector<float> scales{};
|
||||
|
|
@ -138,7 +138,7 @@ TEST(ResizeOpTest, ResizeOpLineartDownSampleTest_2DBilinear_pytorch_half_pixel)
|
|||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT: results mismatch
|
||||
}
|
||||
|
||||
TEST(ResizeOpTest, ResizeOpLineartUpSampleTest_4DBilinear_asymmetric) {
|
||||
TEST(ResizeOpTest, ResizeOpLinearUpSampleTest_4DBilinear_asymmetric) {
|
||||
OpTester test("Resize", 11);
|
||||
std::vector<float> roi{};
|
||||
std::vector<float> scales{1.0f, 1.0f, 2.0f, 4.0f};
|
||||
|
|
@ -172,7 +172,7 @@ TEST(ResizeOpTest, ResizeOpLineartUpSampleTest_4DBilinear_asymmetric) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
TEST(ResizeOpTest, ResizeOpLineartUpSampleTest_2DBilinear_align_corners) {
|
||||
TEST(ResizeOpTest, ResizeOpLinearUpSampleTest_2DBilinear_align_corners) {
|
||||
OpTester test("Resize", 11);
|
||||
std::vector<float> roi{};
|
||||
std::vector<float> scales{2.0f, 4.0f};
|
||||
|
|
@ -197,7 +197,66 @@ TEST(ResizeOpTest, ResizeOpLineartUpSampleTest_2DBilinear_align_corners) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
TEST(ResizeOpTest, ResizeOpLineartScalesNoOpTest) {
|
||||
TEST(ResizeOpTest, ResizeOpLinearDownSampleTest_3DTrilinear_pytorch_half_pixel) {
|
||||
OpTester test("Resize", 11);
|
||||
std::vector<float> roi{};
|
||||
std::vector<float> scales{};
|
||||
std::vector<int64_t> sizes{1, 3, 1};
|
||||
|
||||
test.AddAttribute("mode", "linear");
|
||||
test.AddAttribute("coordinate_transformation_mode", "pytorch_half_pixel");
|
||||
|
||||
const int64_t D = 2, H = 4, W = 4;
|
||||
|
||||
std::vector<float> X = {
|
||||
1.0f, 2.0f, 3.0f, 4.0f,
|
||||
5.0f, 6.0f, 7.0f, 8.0f,
|
||||
9.0f, 10.0f, 11.0f, 12.0f,
|
||||
13.0f, 14.0f, 15.0f, 16.0f,
|
||||
|
||||
1.0f, 2.0f, 3.0f, 4.0f,
|
||||
5.0f, 6.0f, 7.0f, 8.0f,
|
||||
9.0f, 10.0f, 11.0f, 12.0f,
|
||||
13.0f, 14.0f, 15.0f, 16.0f};
|
||||
|
||||
test.AddInput<float>("X", {D, H, W}, X);
|
||||
test.AddInput<float>("roi", {0}, roi);
|
||||
test.AddInput<float>("scales", {0}, scales);
|
||||
test.AddInput<int64_t>("sizes", {3}, sizes);
|
||||
|
||||
std::vector<float> Y = {1.6666666f, 7.0f, 12.333333f};
|
||||
|
||||
test.AddOutput<float>("Y", {sizes[0], sizes[1], sizes[2]}, Y);
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT: results mismatch
|
||||
}
|
||||
|
||||
TEST(ResizeOpTest, ResizeOpLinearUpSampleTest_5DTrilinear_pytorch_half_pixel) {
|
||||
OpTester test("Resize", 11);
|
||||
std::vector<float> roi{};
|
||||
std::vector<float> scales{1.0f, 1.0f, 2.0f, 2.0f, 1.0f};
|
||||
|
||||
test.AddAttribute("mode", "linear");
|
||||
test.AddAttribute("coordinate_transformation_mode", "pytorch_half_pixel");
|
||||
|
||||
const int64_t N = 1, C = 2, D = 2, H = 1, W = 2;
|
||||
|
||||
std::vector<float> X = {
|
||||
1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f};
|
||||
|
||||
test.AddInput<float>("X", {N, C, D, H, W}, X);
|
||||
test.AddInput<float>("roi", {0}, roi);
|
||||
test.AddInput<float>("scales", {5}, scales);
|
||||
|
||||
std::vector<float> Y = {1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f,
|
||||
1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f,
|
||||
1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f,
|
||||
1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f};
|
||||
|
||||
test.AddOutput<float>("Y", {1, 2, 4, 2, 2}, Y);
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT: results mismatch
|
||||
}
|
||||
|
||||
TEST(ResizeOpTest, ResizeOpLinearScalesNoOpTest) {
|
||||
OpTester test("Resize", 11);
|
||||
std::vector<float> roi{};
|
||||
std::vector<float> scales{1.0f, 1.0f, 1.0f, 1.0f};
|
||||
|
|
@ -856,7 +915,7 @@ TEST(ResizeOpTest, ResizeOpCubicUpSampleTest_tf_half_pixel_for_nn) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
TEST(ResizeOpTest, ResizeOpLineartDownSampleTest_4DBilinear_Ver10) {
|
||||
TEST(ResizeOpTest, ResizeOpLinearDownSampleTest_4DBilinear_Ver10) {
|
||||
OpTester test("Resize", 10);
|
||||
std::vector<float> scales{1.0f, 1.0f, 0.6f, 0.6f};
|
||||
|
||||
|
|
@ -876,7 +935,7 @@ TEST(ResizeOpTest, ResizeOpLineartDownSampleTest_4DBilinear_Ver10) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
TEST(ResizeOpTest, ResizeOpLineartDownSampleTest_2DBilinear_Ver10) {
|
||||
TEST(ResizeOpTest, ResizeOpLinearDownSampleTest_2DBilinear_Ver10) {
|
||||
OpTester test("Resize", 10);
|
||||
std::vector<float> scales{0.6f, 0.6f};
|
||||
|
||||
|
|
@ -896,7 +955,7 @@ TEST(ResizeOpTest, ResizeOpLineartDownSampleTest_2DBilinear_Ver10) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
TEST(ResizeOpTest, ResizeOpLineartUpSampleTest_4DBilinear_Ver10) {
|
||||
TEST(ResizeOpTest, ResizeOpLinearUpSampleTest_4DBilinear_Ver10) {
|
||||
OpTester test("Resize", 10);
|
||||
std::vector<float> scales{1.0f, 1.0f, 2.0f, 4.0f};
|
||||
test.AddAttribute("mode", "linear");
|
||||
|
|
@ -926,7 +985,7 @@ TEST(ResizeOpTest, ResizeOpLineartUpSampleTest_4DBilinear_Ver10) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
TEST(ResizeOpTest, ResizeOpLineartUpSampleTest_2DBilinear_Ver10) {
|
||||
TEST(ResizeOpTest, ResizeOpLinearUpSampleTest_2DBilinear_Ver10) {
|
||||
OpTester test("Resize", 10);
|
||||
std::vector<float> scales{2.0f, 4.0f};
|
||||
test.AddAttribute("mode", "linear");
|
||||
|
|
@ -948,7 +1007,7 @@ TEST(ResizeOpTest, ResizeOpLineartUpSampleTest_2DBilinear_Ver10) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
TEST(ResizeOpTest, ResizeOpLineartScalesNoOpTest_Ver10) {
|
||||
TEST(ResizeOpTest, ResizeOpLinearScalesNoOpTest_Ver10) {
|
||||
OpTester test("Resize", 10);
|
||||
std::vector<float> scales{1.0f, 1.0f, 1.0f, 1.0f};
|
||||
test.AddAttribute("mode", "linear");
|
||||
|
|
|
|||
Loading…
Reference in a new issue