diff --git a/caffe2/operators/group_norm_op.h b/caffe2/operators/group_norm_op.h index d31658ab9f7..71cc42bfeef 100644 --- a/caffe2/operators/group_norm_op.h +++ b/caffe2/operators/group_norm_op.h @@ -41,13 +41,18 @@ class GroupNormOp final : public Operator { const int ndim = X.dim(); const int N = X.dim32(0); const int C = order_ == StorageOrder::NCHW ? X.dim32(1) : X.dim32(ndim - 1); - const size_t HxW = X.numel() / (N * C); + const size_t HxW = order_ == StorageOrder::NCHW + ? X.size_from_dim(2) + : X.size_between_dim(0, ndim - 1); CAFFE_ENFORCE_EQ(C % group_, 0); CAFFE_ENFORCE_EQ(gamma.numel(), C); CAFFE_ENFORCE_EQ(beta.numel(), C); const int G = group_; const int K = C / G; auto* Y = Output(OUTPUT, X.sizes(), at::dtype()); + if (N == 0) { + return true; + } T* mu_data = nullptr; T* rsig_data = nullptr; if (OutputSize() == 3) { diff --git a/caffe2/quantization/server/group_norm_dnnlowp_op.cc b/caffe2/quantization/server/group_norm_dnnlowp_op.cc index 50d01b9ec29..f2cab63d64e 100644 --- a/caffe2/quantization/server/group_norm_dnnlowp_op.cc +++ b/caffe2/quantization/server/group_norm_dnnlowp_op.cc @@ -122,9 +122,14 @@ void GroupNormDNNLowPOp::QuantizeBeta() { const auto& beta_int8 = this->template Input(BETA); beta_qparams.scale = beta_int8.scale; beta_qparams.zero_point = beta_int8.zero_point; - CAFFE_ENFORCE_LE( - std::abs(beta_qparams.scale - X_qparams.scale * gamma_qparams.scale), - 1e-4); + const auto& X = InputTensorCPU_(INPUT); + const int N = X.dim32(0); + if (N > 0) { + CAFFE_ENFORCE_LE( + std::abs( + beta_qparams.scale - X_qparams.scale * gamma_qparams.scale), + 1e-4); + } CAFFE_ENFORCE_EQ(beta_qparams.zero_point, 0); beta_quantized_data_ = beta.template data(); if (dequantize_output_) { @@ -300,7 +305,7 @@ bool GroupNormDNNLowPOp::RunOnDeviceWithOrderNCHW() { const auto& X = InputTensorCPU_(INPUT); const int N = X.dim32(0); const int C = X.dim32(1); - const int HxW = X.size() / (N * C); + const int HxW = X.size_from_dim(2); const int G = group_; CAFFE_ENFORCE_EQ(C % G, 0); const int K = C / G; @@ -312,6 +317,9 @@ bool GroupNormDNNLowPOp::RunOnDeviceWithOrderNCHW() { if (dequantize_output_) { float* Y_data = Y->template mutable_data(); + if (N == 0) { + return true; + } mu_dequantized_.resize(N * G); rsig_dequantized_.resize(N * G); float* mu_data = mu_dequantized_.data(); @@ -335,6 +343,9 @@ bool GroupNormDNNLowPOp::RunOnDeviceWithOrderNCHW() { N, C, HxW, X_dequantized_.data(), scale_data, bias_data, Y_data); } else { T* Y_data = GetQuantizedOutputData_(); + if (N == 0) { + return true; + } mu_quantized_.resize(N * G); rsig_quantized_.resize(N * G); int32_t* mu_data = mu_quantized_.data(); @@ -368,7 +379,7 @@ bool GroupNormDNNLowPOp::RunOnDeviceWithOrderNHWC() { const int ndim = X.dim(); const int N = X.dim32(0); const int C = X.dim32(ndim - 1); - const int HxW = X.size() / (N * C); + const int HxW = X.size_between_dim(0, ndim - 1); const int G = group_; CAFFE_ENFORCE_EQ(C % G, 0); const int K = C / G; @@ -380,6 +391,9 @@ bool GroupNormDNNLowPOp::RunOnDeviceWithOrderNHWC() { if (dequantize_output_) { float* Y_data = Y->template mutable_data(); + if (N == 0) { + return true; + } mu_dequantized_.resize(N * G); rsig_dequantized_.resize(N * G); float* mu_data = mu_dequantized_.data(); @@ -403,6 +417,9 @@ bool GroupNormDNNLowPOp::RunOnDeviceWithOrderNHWC() { N, C, HxW, X_dequantized_.data(), scale_data, bias_data, Y_data); } else { T* Y_data = GetQuantizedOutputData_(); + if (N == 0) { + return true; + } mu_quantized_.resize(N * G); rsig_quantized_.resize(N * G); int32_t* mu_data = mu_quantized_.data(); diff --git a/caffe2/quantization/server/group_norm_dnnlowp_op_test.py b/caffe2/quantization/server/group_norm_dnnlowp_op_test.py index b6acc900437..973576bc6ed 100644 --- a/caffe2/quantization/server/group_norm_dnnlowp_op_test.py +++ b/caffe2/quantization/server/group_norm_dnnlowp_op_test.py @@ -17,7 +17,7 @@ workspace.GlobalInit(["caffe2", "--caffe2_omp_num_threads=11"]) class DNNLowPOpGroupNormTest(hu.HypothesisTestCase): @given( - N=st.integers(1, 4), + N=st.integers(0, 4), G=st.integers(2, 4), K=st.integers(2, 12), H=st.integers(4, 16), @@ -80,7 +80,9 @@ class DNNLowPOpGroupNormTest(hu.HypothesisTestCase): ) net.Proto().op.extend([int8_given_tensor_fill]) - X_q_param = dnnlowp_utils.choose_quantization_params(X.min(), X.max()) + X_min = 0 if X.size == 0 else X.min() + X_max = 0 if X.size == 0 else X.max() + X_q_param = dnnlowp_utils.choose_quantization_params(X_min, X_max) int8_bias_tensor_fill = dnnlowp_utils.create_int8_bias_tensor_fill( beta, "beta_q", X_q_param, gamma_q_param ) diff --git a/caffe2/quantization/server/spatial_batch_norm_dnnlowp_op.cc b/caffe2/quantization/server/spatial_batch_norm_dnnlowp_op.cc index 612447de7da..b1f1b2fc2a6 100644 --- a/caffe2/quantization/server/spatial_batch_norm_dnnlowp_op.cc +++ b/caffe2/quantization/server/spatial_batch_norm_dnnlowp_op.cc @@ -70,10 +70,7 @@ bool SpatialBNDNNLowPOp::RunOnDevice() { const int N = X.dim32(0); const int C = (order_ == StorageOrder::NCHW ? X.dim32(1) : X.dim32(ndim - 1)); const std::vector X_dims(X.sizes().cbegin(), X.sizes().cend()); - const int HxW = - std::accumulate( - X_dims.cbegin() + 1, X_dims.cend(), 1, std::multiplies()) / - C; + const int HxW = X.size_from_dim(1) / C; CAFFE_ENFORCE_EQ(scale.numel(), C); CAFFE_ENFORCE_EQ(bias.numel(), C); @@ -89,13 +86,18 @@ bool SpatialBNDNNLowPOp::RunOnDevice() { &beta_, {C}, at::dtype().device(CPUContext::GetDeviceType())); float* alpha_data = alpha_.template mutable_data(); float* beta_data = beta_.template mutable_data(); - if (N == 0) { - return true; - } const auto& mean = Input(EST_MEAN); const auto& var = Input(EST_VAR); CAFFE_ENFORCE_EQ(mean.numel(), C); CAFFE_ENFORCE_EQ(var.numel(), C); + + auto* Y = OutputTensorCPU_(OUTPUT); + Y->Resize(X.sizes()); + T* Y_data = GetQuantizedOutputData_(); + if (N == 0) { + return true; + } + ComputeFusedParam_( C, scale_data, @@ -108,9 +110,6 @@ bool SpatialBNDNNLowPOp::RunOnDevice() { vector X_temp; const T* X_data = dnnlowp::QuantizeInputIfNeeded(this, 0, in_qparams_[0], X_temp); - auto* Y = OutputTensorCPU_(OUTPUT); - Y->Resize(X.sizes()); - T* Y_data = GetQuantizedOutputData_(); if (order_ == StorageOrder::NCHW) { for (int c = 0; c < C; ++c) { diff --git a/caffe2/quantization/server/spatial_batch_norm_dnnlowp_op_test.py b/caffe2/quantization/server/spatial_batch_norm_dnnlowp_op_test.py index 0d8468f0eb7..71dc54e0bca 100644 --- a/caffe2/quantization/server/spatial_batch_norm_dnnlowp_op_test.py +++ b/caffe2/quantization/server/spatial_batch_norm_dnnlowp_op_test.py @@ -21,7 +21,7 @@ class DNNLowPOpSpatialBNTest(hu.HypothesisTestCase): size=st.integers(10, 16), input_channels=st.integers(2, 16), output_channels=st.integers(2, 16), - batch_size=st.integers(1, 3), + batch_size=st.integers(0, 3), order=st.sampled_from(["NCHW", "NHWC"]), in_quantized=st.booleans(), out_quantized=st.booleans(), @@ -46,8 +46,9 @@ class DNNLowPOpSpatialBNTest(hu.HypothesisTestCase): X = np.round(np.random.rand(batch_size, size, size, input_channels)).astype( np.float32 ) - X[0, 0, 0, 0] = X_min - X[0, 0, 0, 1] = X_max + if batch_size != 0: + X[0, 0, 0, 0] = X_min + X[0, 0, 0, 1] = X_max epsilon = np.abs(np.random.rand()) scale = np.random.rand(input_channels).astype(np.float32)