From e2f12885140c36c1d5bf82de6eb47797856fdacd Mon Sep 17 00:00:00 2001
From: Summer Deng <summerdeng@fb.com>
Date: Sat, 8 Feb 2020 18:14:25 -0800
Subject: [PATCH] Add utils to inspect fp16/int8 packed weights (#32979)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/32979

Since we use prepacked weights in the Fp16 FCs and future Int8 FCs in production Ads models, we provide the python utils to inspect the unpacked format of the weights for debugging purpose. The main interfaces are the following:

```
from deeplearning.numeric_suite.toolkit import packed_weights_inspector
# inspect fp16 packed weights
unpacked_fp16_weights = packed_weights_inspector.extract_fp16_fc_packed_weights(fp16_weight_blob_name)

# inspect int8 packed weights
unpacked_int8_weights, qparams = packed_weights_inspector.extract_int8_fc_packed_weights(int8_weight_blob_name)
```

Test Plan:
```
buck test mode/opt deeplearning/numeric_suite/toolkit/test:packed_weights_inspector_test
```

Reviewed By: amylittleyang

Differential Revision: D19724474

fbshipit-source-id: e937672b3722e61bc44c2587aab2288a86aece9a
---
 caffe2/quantization/server/pybind.cc | 97 +++++++++++++++++++++++++++-
 1 file changed, 96 insertions(+), 1 deletion(-)

diff --git a/caffe2/quantization/server/pybind.cc b/caffe2/quantization/server/pybind.cc
index c012f0f3961..bdf828f7745 100644
--- a/caffe2/quantization/server/pybind.cc
+++ b/caffe2/quantization/server/pybind.cc
@@ -1,9 +1,12 @@
+#include <fbgemm/FbgemmFP16.h>
+#include <fbgemm/Utils.h>
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 #include "activation_distribution_observer.h"
+#include "caffe2/opt/custom/fakefp16_transform.h"
+#include "caffe2/quantization/server/fbgemm_pack_blob.h"
 #include "caffe2_dnnlowp_utils.h"
 #include "quantization_error_minimization.h"
-#include "caffe2/opt/custom/fakefp16_transform.h"
 
 namespace caffe2 {
 namespace python {
@@ -278,4 +281,96 @@ PYBIND11_MODULE(dnnlowp_pybind11, m) {
       pybind11::arg("quant_scheme") = "min_max",
       pybind11::arg("p99_threshold") = 0.99,
       pybind11::arg("is_weight") = false);
+  m.def(
+      "ObserveFp16FCPackedWeights",
+      [](const string& blob_name, const string& weights_out_file) {
+        Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
+        CAFFE_ENFORCE(gWorkspace);
+        const auto* blob = gWorkspace->GetBlob(blob_name);
+        CAFFE_ENFORCE(blob, "Can't find blob ", blob_name);
+        fbgemm::PackedGemmMatrixFP16* packedGemmMatrixPtr =
+            blob->template Get<unique_ptr<fbgemm::PackedGemmMatrixFP16>>()
+                .get();
+        uint64_t nrow = packedGemmMatrixPtr->numRows();
+        uint64_t ncol = packedGemmMatrixPtr->numCols();
+        uint64_t size = nrow * ncol;
+        fbgemm::float16* unpacked_mat_ptr = nullptr;
+        vector<fbgemm::float16> unpacked_mat;
+
+        if (!packedGemmMatrixPtr->packed()) {
+          unpacked_mat_ptr = packedGemmMatrixPtr->pmat();
+        } else {
+          unpacked_mat.resize(size);
+          packedGemmMatrixPtr->unpack(
+              unpacked_mat.data(), fbgemm::matrix_op_t::Transpose);
+          unpacked_mat_ptr = unpacked_mat.data();
+        }
+        ofstream fout;
+        fout.open(weights_out_file);
+        if (!fout) {
+          LOG(WARNING) << "Can't open output file to dump fp16 weights "
+                       << weights_out_file;
+          return;
+        }
+        for (int i = 0; i < nrow; ++i) {
+          for (int j = 0; j < ncol; ++j) {
+            if (j > 0) {
+              fout << " ";
+            }
+            fout << fbgemm::cpu_half2float(unpacked_mat_ptr[i + nrow * j]);
+          }
+          fout << endl;
+        }
+        LOG(INFO) << "Written unpacked blob " << blob_name << " to "
+                  << weights_out_file;
+      },
+      pybind11::arg("blob_name"),
+      pybind11::arg("weights_out_file"));
+  m.def(
+      "ObserveInt8FCPackedWeights",
+      [](const string& blob_name, const string& weights_out_file) {
+        Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
+        CAFFE_ENFORCE(gWorkspace);
+        const auto* blob = gWorkspace->GetBlob(blob_name);
+        if (blob == nullptr) {
+          LOG(WARNING) << "Can't find blob " << blob_name;
+          return;
+        }
+        const Int8FCDNNLowPPackedWeightBlob& packedInt8Blob =
+            blob->template Get<Int8FCDNNLowPPackedWeightBlob>();
+        auto& qparams = packedInt8Blob.qparams;
+        auto& int8_tensor = packedInt8Blob.original_tensor;
+
+        auto shape = int8_tensor.sizes();
+
+        ofstream fout;
+        fout.open(weights_out_file);
+        if (!fout) {
+          LOG(WARNING) << "Can't open output file to dump int8 weights "
+                       << weights_out_file;
+          return;
+        }
+        for (int i = 0; i < qparams.size(); ++i) {
+          if (i > 0) {
+            fout << " ";
+          }
+          fout << to_string(qparams[i].scale) << " "
+               << to_string(qparams[i].zero_point);
+        }
+        fout << endl;
+        int8_t* int8_data = int8_tensor.data<int8_t>();
+        for (int i = 0; i < shape[0]; ++i) {
+          for (int j = 0; j < shape[1]; ++j) {
+            if (j > 0) {
+              fout << " ";
+            }
+            fout << to_string(int8_data[i * shape[1] + j]);
+          }
+          fout << endl;
+        }
+        LOG(INFO) << "Written int8 qparams and weights for " << blob_name
+                  << " to " << weights_out_file;
+      },
+      pybind11::arg("blob_name"),
+      pybind11::arg("weights_out_file"));
 }