diff --git a/caffe2/operators/group_norm_op.h b/caffe2/operators/group_norm_op.h
index d31658ab9f7..71cc42bfeef 100644
--- a/caffe2/operators/group_norm_op.h
+++ b/caffe2/operators/group_norm_op.h
@@ -41,13 +41,18 @@ class GroupNormOp final : public Operator<Context> {
     const int ndim = X.dim();
     const int N = X.dim32(0);
     const int C = order_ == StorageOrder::NCHW ? X.dim32(1) : X.dim32(ndim - 1);
-    const size_t HxW = X.numel() / (N * C);
+    const size_t HxW = order_ == StorageOrder::NCHW
+        ? X.size_from_dim(2)
+        : X.size_between_dim(0, ndim - 1);
     CAFFE_ENFORCE_EQ(C % group_, 0);
     CAFFE_ENFORCE_EQ(gamma.numel(), C);
     CAFFE_ENFORCE_EQ(beta.numel(), C);
     const int G = group_;
     const int K = C / G;
     auto* Y = Output(OUTPUT, X.sizes(), at::dtype<T>());
+    if (N == 0) {
+      return true;
+    }
     T* mu_data = nullptr;
     T* rsig_data = nullptr;
     if (OutputSize() == 3) {
diff --git a/caffe2/quantization/server/group_norm_dnnlowp_op.cc b/caffe2/quantization/server/group_norm_dnnlowp_op.cc
index 50d01b9ec29..f2cab63d64e 100644
--- a/caffe2/quantization/server/group_norm_dnnlowp_op.cc
+++ b/caffe2/quantization/server/group_norm_dnnlowp_op.cc
@@ -122,9 +122,14 @@ void GroupNormDNNLowPOp<T>::QuantizeBeta() {
       const auto& beta_int8 = this->template Input<int8::Int8TensorCPU>(BETA);
       beta_qparams.scale = beta_int8.scale;
       beta_qparams.zero_point = beta_int8.zero_point;
-      CAFFE_ENFORCE_LE(
-          std::abs(beta_qparams.scale - X_qparams.scale * gamma_qparams.scale),
-          1e-4);
+      const auto& X = InputTensorCPU_(INPUT);
+      const int N = X.dim32(0);
+      if (N > 0) {
+        CAFFE_ENFORCE_LE(
+            std::abs(
+                beta_qparams.scale - X_qparams.scale * gamma_qparams.scale),
+            1e-4);
+      }
       CAFFE_ENFORCE_EQ(beta_qparams.zero_point, 0);
       beta_quantized_data_ = beta.template data<int32_t>();
       if (dequantize_output_) {
@@ -300,7 +305,7 @@ bool GroupNormDNNLowPOp<T>::RunOnDeviceWithOrderNCHW() {
   const auto& X = InputTensorCPU_(INPUT);
   const int N = X.dim32(0);
   const int C = X.dim32(1);
-  const int HxW = X.size() / (N * C);
+  const int HxW = X.size_from_dim(2);
   const int G = group_;
   CAFFE_ENFORCE_EQ(C % G, 0);
   const int K = C / G;
@@ -312,6 +317,9 @@ bool GroupNormDNNLowPOp<T>::RunOnDeviceWithOrderNCHW() {
 
   if (dequantize_output_) {
     float* Y_data = Y->template mutable_data<float>();
+    if (N == 0) {
+      return true;
+    }
     mu_dequantized_.resize(N * G);
     rsig_dequantized_.resize(N * G);
     float* mu_data = mu_dequantized_.data();
@@ -335,6 +343,9 @@ bool GroupNormDNNLowPOp<T>::RunOnDeviceWithOrderNCHW() {
         N, C, HxW, X_dequantized_.data(), scale_data, bias_data, Y_data);
   } else {
     T* Y_data = GetQuantizedOutputData_();
+    if (N == 0) {
+      return true;
+    }
     mu_quantized_.resize(N * G);
     rsig_quantized_.resize(N * G);
     int32_t* mu_data = mu_quantized_.data();
@@ -368,7 +379,7 @@ bool GroupNormDNNLowPOp<T>::RunOnDeviceWithOrderNHWC() {
   const int ndim = X.dim();
   const int N = X.dim32(0);
   const int C = X.dim32(ndim - 1);
-  const int HxW = X.size() / (N * C);
+  const int HxW = X.size_between_dim(0, ndim - 1);
   const int G = group_;
   CAFFE_ENFORCE_EQ(C % G, 0);
   const int K = C / G;
@@ -380,6 +391,9 @@ bool GroupNormDNNLowPOp<T>::RunOnDeviceWithOrderNHWC() {
 
   if (dequantize_output_) {
     float* Y_data = Y->template mutable_data<float>();
+    if (N == 0) {
+      return true;
+    }
     mu_dequantized_.resize(N * G);
     rsig_dequantized_.resize(N * G);
     float* mu_data = mu_dequantized_.data();
@@ -403,6 +417,9 @@ bool GroupNormDNNLowPOp<T>::RunOnDeviceWithOrderNHWC() {
         N, C, HxW, X_dequantized_.data(), scale_data, bias_data, Y_data);
   } else {
     T* Y_data = GetQuantizedOutputData_();
+    if (N == 0) {
+      return true;
+    }
     mu_quantized_.resize(N * G);
     rsig_quantized_.resize(N * G);
     int32_t* mu_data = mu_quantized_.data();
diff --git a/caffe2/quantization/server/group_norm_dnnlowp_op_test.py b/caffe2/quantization/server/group_norm_dnnlowp_op_test.py
index b6acc900437..973576bc6ed 100644
--- a/caffe2/quantization/server/group_norm_dnnlowp_op_test.py
+++ b/caffe2/quantization/server/group_norm_dnnlowp_op_test.py
@@ -17,7 +17,7 @@ workspace.GlobalInit(["caffe2", "--caffe2_omp_num_threads=11"])
 
 class DNNLowPOpGroupNormTest(hu.HypothesisTestCase):
     @given(
-        N=st.integers(1, 4),
+        N=st.integers(0, 4),
         G=st.integers(2, 4),
         K=st.integers(2, 12),
         H=st.integers(4, 16),
@@ -80,7 +80,9 @@ class DNNLowPOpGroupNormTest(hu.HypothesisTestCase):
                 )
                 net.Proto().op.extend([int8_given_tensor_fill])
 
-                X_q_param = dnnlowp_utils.choose_quantization_params(X.min(), X.max())
+                X_min = 0 if X.size == 0 else X.min()
+                X_max = 0 if X.size == 0 else X.max()
+                X_q_param = dnnlowp_utils.choose_quantization_params(X_min, X_max)
                 int8_bias_tensor_fill = dnnlowp_utils.create_int8_bias_tensor_fill(
                     beta, "beta_q", X_q_param, gamma_q_param
                 )
diff --git a/caffe2/quantization/server/spatial_batch_norm_dnnlowp_op.cc b/caffe2/quantization/server/spatial_batch_norm_dnnlowp_op.cc
index 612447de7da..b1f1b2fc2a6 100644
--- a/caffe2/quantization/server/spatial_batch_norm_dnnlowp_op.cc
+++ b/caffe2/quantization/server/spatial_batch_norm_dnnlowp_op.cc
@@ -70,10 +70,7 @@ bool SpatialBNDNNLowPOp<T, ReluFused>::RunOnDevice() {
   const int N = X.dim32(0);
   const int C = (order_ == StorageOrder::NCHW ? X.dim32(1) : X.dim32(ndim - 1));
   const std::vector<int> X_dims(X.sizes().cbegin(), X.sizes().cend());
-  const int HxW =
-      std::accumulate(
-          X_dims.cbegin() + 1, X_dims.cend(), 1, std::multiplies<int>()) /
-      C;
+  const int HxW = X.size_from_dim(1) / C;
   CAFFE_ENFORCE_EQ(scale.numel(), C);
   CAFFE_ENFORCE_EQ(bias.numel(), C);
 
@@ -89,13 +86,18 @@ bool SpatialBNDNNLowPOp<T, ReluFused>::RunOnDevice() {
       &beta_, {C}, at::dtype<float>().device(CPUContext::GetDeviceType()));
   float* alpha_data = alpha_.template mutable_data<float>();
   float* beta_data = beta_.template mutable_data<float>();
-  if (N == 0) {
-    return true;
-  }
   const auto& mean = Input(EST_MEAN);
   const auto& var = Input(EST_VAR);
   CAFFE_ENFORCE_EQ(mean.numel(), C);
   CAFFE_ENFORCE_EQ(var.numel(), C);
+
+  auto* Y = OutputTensorCPU_(OUTPUT);
+  Y->Resize(X.sizes());
+  T* Y_data = GetQuantizedOutputData_();
+  if (N == 0) {
+    return true;
+  }
+
   ComputeFusedParam_(
       C,
       scale_data,
@@ -108,9 +110,6 @@ bool SpatialBNDNNLowPOp<T, ReluFused>::RunOnDevice() {
   vector<T> X_temp;
   const T* X_data =
       dnnlowp::QuantizeInputIfNeeded(this, 0, in_qparams_[0], X_temp);
-  auto* Y = OutputTensorCPU_(OUTPUT);
-  Y->Resize(X.sizes());
-  T* Y_data = GetQuantizedOutputData_();
 
   if (order_ == StorageOrder::NCHW) {
     for (int c = 0; c < C; ++c) {
diff --git a/caffe2/quantization/server/spatial_batch_norm_dnnlowp_op_test.py b/caffe2/quantization/server/spatial_batch_norm_dnnlowp_op_test.py
index 0d8468f0eb7..71dc54e0bca 100644
--- a/caffe2/quantization/server/spatial_batch_norm_dnnlowp_op_test.py
+++ b/caffe2/quantization/server/spatial_batch_norm_dnnlowp_op_test.py
@@ -21,7 +21,7 @@ class DNNLowPOpSpatialBNTest(hu.HypothesisTestCase):
         size=st.integers(10, 16),
         input_channels=st.integers(2, 16),
         output_channels=st.integers(2, 16),
-        batch_size=st.integers(1, 3),
+        batch_size=st.integers(0, 3),
         order=st.sampled_from(["NCHW", "NHWC"]),
         in_quantized=st.booleans(),
         out_quantized=st.booleans(),
@@ -46,8 +46,9 @@ class DNNLowPOpSpatialBNTest(hu.HypothesisTestCase):
         X = np.round(np.random.rand(batch_size, size, size, input_channels)).astype(
             np.float32
         )
-        X[0, 0, 0, 0] = X_min
-        X[0, 0, 0, 1] = X_max
+        if batch_size != 0:
+            X[0, 0, 0, 0] = X_min
+            X[0, 0, 0, 1] = X_max
 
         epsilon = np.abs(np.random.rand())
         scale = np.random.rand(input_channels).astype(np.float32)