mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
batch size 0 support in norm operators (#26894)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/26894 Add batch_size == 0 testings of norm DNNLOWP operators. Test Plan: CI Reviewed By: jianyuh Differential Revision: D17595416 fbshipit-source-id: 23086ecf8818be30da031eb4fc2922daea79ea7c
This commit is contained in:
parent
f99bc714c7
commit
ec1f0f08f1
5 changed files with 45 additions and 21 deletions
|
|
@ -41,13 +41,18 @@ class GroupNormOp final : public Operator<Context> {
|
|||
const int ndim = X.dim();
|
||||
const int N = X.dim32(0);
|
||||
const int C = order_ == StorageOrder::NCHW ? X.dim32(1) : X.dim32(ndim - 1);
|
||||
const size_t HxW = X.numel() / (N * C);
|
||||
const size_t HxW = order_ == StorageOrder::NCHW
|
||||
? X.size_from_dim(2)
|
||||
: X.size_between_dim(0, ndim - 1);
|
||||
CAFFE_ENFORCE_EQ(C % group_, 0);
|
||||
CAFFE_ENFORCE_EQ(gamma.numel(), C);
|
||||
CAFFE_ENFORCE_EQ(beta.numel(), C);
|
||||
const int G = group_;
|
||||
const int K = C / G;
|
||||
auto* Y = Output(OUTPUT, X.sizes(), at::dtype<T>());
|
||||
if (N == 0) {
|
||||
return true;
|
||||
}
|
||||
T* mu_data = nullptr;
|
||||
T* rsig_data = nullptr;
|
||||
if (OutputSize() == 3) {
|
||||
|
|
|
|||
|
|
@ -122,9 +122,14 @@ void GroupNormDNNLowPOp<T>::QuantizeBeta() {
|
|||
const auto& beta_int8 = this->template Input<int8::Int8TensorCPU>(BETA);
|
||||
beta_qparams.scale = beta_int8.scale;
|
||||
beta_qparams.zero_point = beta_int8.zero_point;
|
||||
CAFFE_ENFORCE_LE(
|
||||
std::abs(beta_qparams.scale - X_qparams.scale * gamma_qparams.scale),
|
||||
1e-4);
|
||||
const auto& X = InputTensorCPU_(INPUT);
|
||||
const int N = X.dim32(0);
|
||||
if (N > 0) {
|
||||
CAFFE_ENFORCE_LE(
|
||||
std::abs(
|
||||
beta_qparams.scale - X_qparams.scale * gamma_qparams.scale),
|
||||
1e-4);
|
||||
}
|
||||
CAFFE_ENFORCE_EQ(beta_qparams.zero_point, 0);
|
||||
beta_quantized_data_ = beta.template data<int32_t>();
|
||||
if (dequantize_output_) {
|
||||
|
|
@ -300,7 +305,7 @@ bool GroupNormDNNLowPOp<T>::RunOnDeviceWithOrderNCHW() {
|
|||
const auto& X = InputTensorCPU_(INPUT);
|
||||
const int N = X.dim32(0);
|
||||
const int C = X.dim32(1);
|
||||
const int HxW = X.size() / (N * C);
|
||||
const int HxW = X.size_from_dim(2);
|
||||
const int G = group_;
|
||||
CAFFE_ENFORCE_EQ(C % G, 0);
|
||||
const int K = C / G;
|
||||
|
|
@ -312,6 +317,9 @@ bool GroupNormDNNLowPOp<T>::RunOnDeviceWithOrderNCHW() {
|
|||
|
||||
if (dequantize_output_) {
|
||||
float* Y_data = Y->template mutable_data<float>();
|
||||
if (N == 0) {
|
||||
return true;
|
||||
}
|
||||
mu_dequantized_.resize(N * G);
|
||||
rsig_dequantized_.resize(N * G);
|
||||
float* mu_data = mu_dequantized_.data();
|
||||
|
|
@ -335,6 +343,9 @@ bool GroupNormDNNLowPOp<T>::RunOnDeviceWithOrderNCHW() {
|
|||
N, C, HxW, X_dequantized_.data(), scale_data, bias_data, Y_data);
|
||||
} else {
|
||||
T* Y_data = GetQuantizedOutputData_();
|
||||
if (N == 0) {
|
||||
return true;
|
||||
}
|
||||
mu_quantized_.resize(N * G);
|
||||
rsig_quantized_.resize(N * G);
|
||||
int32_t* mu_data = mu_quantized_.data();
|
||||
|
|
@ -368,7 +379,7 @@ bool GroupNormDNNLowPOp<T>::RunOnDeviceWithOrderNHWC() {
|
|||
const int ndim = X.dim();
|
||||
const int N = X.dim32(0);
|
||||
const int C = X.dim32(ndim - 1);
|
||||
const int HxW = X.size() / (N * C);
|
||||
const int HxW = X.size_between_dim(0, ndim - 1);
|
||||
const int G = group_;
|
||||
CAFFE_ENFORCE_EQ(C % G, 0);
|
||||
const int K = C / G;
|
||||
|
|
@ -380,6 +391,9 @@ bool GroupNormDNNLowPOp<T>::RunOnDeviceWithOrderNHWC() {
|
|||
|
||||
if (dequantize_output_) {
|
||||
float* Y_data = Y->template mutable_data<float>();
|
||||
if (N == 0) {
|
||||
return true;
|
||||
}
|
||||
mu_dequantized_.resize(N * G);
|
||||
rsig_dequantized_.resize(N * G);
|
||||
float* mu_data = mu_dequantized_.data();
|
||||
|
|
@ -403,6 +417,9 @@ bool GroupNormDNNLowPOp<T>::RunOnDeviceWithOrderNHWC() {
|
|||
N, C, HxW, X_dequantized_.data(), scale_data, bias_data, Y_data);
|
||||
} else {
|
||||
T* Y_data = GetQuantizedOutputData_();
|
||||
if (N == 0) {
|
||||
return true;
|
||||
}
|
||||
mu_quantized_.resize(N * G);
|
||||
rsig_quantized_.resize(N * G);
|
||||
int32_t* mu_data = mu_quantized_.data();
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ workspace.GlobalInit(["caffe2", "--caffe2_omp_num_threads=11"])
|
|||
|
||||
class DNNLowPOpGroupNormTest(hu.HypothesisTestCase):
|
||||
@given(
|
||||
N=st.integers(1, 4),
|
||||
N=st.integers(0, 4),
|
||||
G=st.integers(2, 4),
|
||||
K=st.integers(2, 12),
|
||||
H=st.integers(4, 16),
|
||||
|
|
@ -80,7 +80,9 @@ class DNNLowPOpGroupNormTest(hu.HypothesisTestCase):
|
|||
)
|
||||
net.Proto().op.extend([int8_given_tensor_fill])
|
||||
|
||||
X_q_param = dnnlowp_utils.choose_quantization_params(X.min(), X.max())
|
||||
X_min = 0 if X.size == 0 else X.min()
|
||||
X_max = 0 if X.size == 0 else X.max()
|
||||
X_q_param = dnnlowp_utils.choose_quantization_params(X_min, X_max)
|
||||
int8_bias_tensor_fill = dnnlowp_utils.create_int8_bias_tensor_fill(
|
||||
beta, "beta_q", X_q_param, gamma_q_param
|
||||
)
|
||||
|
|
|
|||
|
|
@ -70,10 +70,7 @@ bool SpatialBNDNNLowPOp<T, ReluFused>::RunOnDevice() {
|
|||
const int N = X.dim32(0);
|
||||
const int C = (order_ == StorageOrder::NCHW ? X.dim32(1) : X.dim32(ndim - 1));
|
||||
const std::vector<int> X_dims(X.sizes().cbegin(), X.sizes().cend());
|
||||
const int HxW =
|
||||
std::accumulate(
|
||||
X_dims.cbegin() + 1, X_dims.cend(), 1, std::multiplies<int>()) /
|
||||
C;
|
||||
const int HxW = X.size_from_dim(1) / C;
|
||||
CAFFE_ENFORCE_EQ(scale.numel(), C);
|
||||
CAFFE_ENFORCE_EQ(bias.numel(), C);
|
||||
|
||||
|
|
@ -89,13 +86,18 @@ bool SpatialBNDNNLowPOp<T, ReluFused>::RunOnDevice() {
|
|||
&beta_, {C}, at::dtype<float>().device(CPUContext::GetDeviceType()));
|
||||
float* alpha_data = alpha_.template mutable_data<float>();
|
||||
float* beta_data = beta_.template mutable_data<float>();
|
||||
if (N == 0) {
|
||||
return true;
|
||||
}
|
||||
const auto& mean = Input(EST_MEAN);
|
||||
const auto& var = Input(EST_VAR);
|
||||
CAFFE_ENFORCE_EQ(mean.numel(), C);
|
||||
CAFFE_ENFORCE_EQ(var.numel(), C);
|
||||
|
||||
auto* Y = OutputTensorCPU_(OUTPUT);
|
||||
Y->Resize(X.sizes());
|
||||
T* Y_data = GetQuantizedOutputData_();
|
||||
if (N == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
ComputeFusedParam_(
|
||||
C,
|
||||
scale_data,
|
||||
|
|
@ -108,9 +110,6 @@ bool SpatialBNDNNLowPOp<T, ReluFused>::RunOnDevice() {
|
|||
vector<T> X_temp;
|
||||
const T* X_data =
|
||||
dnnlowp::QuantizeInputIfNeeded(this, 0, in_qparams_[0], X_temp);
|
||||
auto* Y = OutputTensorCPU_(OUTPUT);
|
||||
Y->Resize(X.sizes());
|
||||
T* Y_data = GetQuantizedOutputData_();
|
||||
|
||||
if (order_ == StorageOrder::NCHW) {
|
||||
for (int c = 0; c < C; ++c) {
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ class DNNLowPOpSpatialBNTest(hu.HypothesisTestCase):
|
|||
size=st.integers(10, 16),
|
||||
input_channels=st.integers(2, 16),
|
||||
output_channels=st.integers(2, 16),
|
||||
batch_size=st.integers(1, 3),
|
||||
batch_size=st.integers(0, 3),
|
||||
order=st.sampled_from(["NCHW", "NHWC"]),
|
||||
in_quantized=st.booleans(),
|
||||
out_quantized=st.booleans(),
|
||||
|
|
@ -46,8 +46,9 @@ class DNNLowPOpSpatialBNTest(hu.HypothesisTestCase):
|
|||
X = np.round(np.random.rand(batch_size, size, size, input_channels)).astype(
|
||||
np.float32
|
||||
)
|
||||
X[0, 0, 0, 0] = X_min
|
||||
X[0, 0, 0, 1] = X_max
|
||||
if batch_size != 0:
|
||||
X[0, 0, 0, 0] = X_min
|
||||
X[0, 0, 0, 1] = X_max
|
||||
|
||||
epsilon = np.abs(np.random.rand())
|
||||
scale = np.random.rand(input_channels).astype(np.float32)
|
||||
|
|
|
|||
Loading…
Reference in a new issue