Add utils to inspect fp16/int8 packed weights (#32979)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/32979 Since we use prepacked weights in the Fp16 FCs and future Int8 FCs in production Ads models, we provide the python utils to inspect the unpacked format of the weights for debugging purpose. The main interfaces are the following: ``` from deeplearning.numeric_suite.toolkit import packed_weights_inspector # inspect fp16 packed weights unpacked_fp16_weights = packed_weights_inspector.extract_fp16_fc_packed_weights(fp16_weight_blob_name) # inspect int8 packed weights unpacked_int8_weights, qparams = packed_weights_inspector.extract_int8_fc_packed_weights(int8_weight_blob_name) ``` Test Plan: ``` buck test mode/opt deeplearning/numeric_suite/toolkit/test:packed_weights_inspector_test ``` Reviewed By: amylittleyang Differential Revision: D19724474 fbshipit-source-id: e937672b3722e61bc44c2587aab2288a86aece9a
2026-05-14 20:57:59 +00:00 · 2020-02-08 18:14:25 -08:00 · 2020-02-08 18:14:25 -08:00 · e2f1288514
commit e2f1288514
parent 6249d7302b
1 changed files with 96 additions and 1 deletions
--- a/caffe2/quantization/server/pybind.cc
+++ b/caffe2/quantization/server/pybind.cc
@ -1,9 +1,12 @@
+#include <fbgemm/FbgemmFP16.h>
+#include <fbgemm/Utils.h>
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 #include "activation_distribution_observer.h"
+#include "caffe2/opt/custom/fakefp16_transform.h"
+#include "caffe2/quantization/server/fbgemm_pack_blob.h"
 #include "caffe2_dnnlowp_utils.h"
 #include "quantization_error_minimization.h"
-#include "caffe2/opt/custom/fakefp16_transform.h"

 namespace caffe2 {
 namespace python {
@ -278,4 +281,96 @@ PYBIND11_MODULE(dnnlowp_pybind11, m) {
      pybind11::arg("quant_scheme") = "min_max",
      pybind11::arg("p99_threshold") = 0.99,
      pybind11::arg("is_weight") = false);
+  m.def(
+      "ObserveFp16FCPackedWeights",
+      [](const string& blob_name, const string& weights_out_file) {
+        Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
+        CAFFE_ENFORCE(gWorkspace);
+        const auto* blob = gWorkspace->GetBlob(blob_name);
+        CAFFE_ENFORCE(blob, "Can't find blob ", blob_name);
+        fbgemm::PackedGemmMatrixFP16* packedGemmMatrixPtr =
+            blob->template Get<unique_ptr<fbgemm::PackedGemmMatrixFP16>>()
+                .get();
+        uint64_t nrow = packedGemmMatrixPtr->numRows();
+        uint64_t ncol = packedGemmMatrixPtr->numCols();
+        uint64_t size = nrow * ncol;
+        fbgemm::float16* unpacked_mat_ptr = nullptr;
+        vector<fbgemm::float16> unpacked_mat;
+
+        if (!packedGemmMatrixPtr->packed()) {
+          unpacked_mat_ptr = packedGemmMatrixPtr->pmat();
+        } else {
+          unpacked_mat.resize(size);
+          packedGemmMatrixPtr->unpack(
+              unpacked_mat.data(), fbgemm::matrix_op_t::Transpose);
+          unpacked_mat_ptr = unpacked_mat.data();
+        }
+        ofstream fout;
+        fout.open(weights_out_file);
+        if (!fout) {
+          LOG(WARNING) << "Can't open output file to dump fp16 weights "
+                       << weights_out_file;
+          return;
+        }
+        for (int i = 0; i < nrow; ++i) {
+          for (int j = 0; j < ncol; ++j) {
+            if (j > 0) {
+              fout << " ";
+            }
+            fout << fbgemm::cpu_half2float(unpacked_mat_ptr[i + nrow * j]);
+          }
+          fout << endl;
+        }
+        LOG(INFO) << "Written unpacked blob " << blob_name << " to "
+                  << weights_out_file;
+      },
+      pybind11::arg("blob_name"),
+      pybind11::arg("weights_out_file"));
+  m.def(
+      "ObserveInt8FCPackedWeights",
+      [](const string& blob_name, const string& weights_out_file) {
+        Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
+        CAFFE_ENFORCE(gWorkspace);
+        const auto* blob = gWorkspace->GetBlob(blob_name);
+        if (blob == nullptr) {
+          LOG(WARNING) << "Can't find blob " << blob_name;
+          return;
+        }
+        const Int8FCDNNLowPPackedWeightBlob& packedInt8Blob =
+            blob->template Get<Int8FCDNNLowPPackedWeightBlob>();
+        auto& qparams = packedInt8Blob.qparams;
+        auto& int8_tensor = packedInt8Blob.original_tensor;
+
+        auto shape = int8_tensor.sizes();
+
+        ofstream fout;
+        fout.open(weights_out_file);
+        if (!fout) {
+          LOG(WARNING) << "Can't open output file to dump int8 weights "
+                       << weights_out_file;
+          return;
+        }
+        for (int i = 0; i < qparams.size(); ++i) {
+          if (i > 0) {
+            fout << " ";
+          }
+          fout << to_string(qparams[i].scale) << " "
+               << to_string(qparams[i].zero_point);
+        }
+        fout << endl;
+        int8_t* int8_data = int8_tensor.data<int8_t>();
+        for (int i = 0; i < shape[0]; ++i) {
+          for (int j = 0; j < shape[1]; ++j) {
+            if (j > 0) {
+              fout << " ";
+            }
+            fout << to_string(int8_data[i * shape[1] + j]);
+          }
+          fout << endl;
+        }
+        LOG(INFO) << "Written int8 qparams and weights for " << blob_name
+                  << " to " << weights_out_file;
+      },
+      pybind11::arg("blob_name"),
+      pybind11::arg("weights_out_file"));
 }