Add utils to inspect fp16/int8 packed weights (#32979)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/32979

Since we use prepacked weights in the Fp16 FCs and future Int8 FCs in production Ads models, we provide the python utils to inspect the unpacked format of the weights for debugging purpose. The main interfaces are the following:

```
from deeplearning.numeric_suite.toolkit import packed_weights_inspector
# inspect fp16 packed weights
unpacked_fp16_weights = packed_weights_inspector.extract_fp16_fc_packed_weights(fp16_weight_blob_name)

# inspect int8 packed weights
unpacked_int8_weights, qparams = packed_weights_inspector.extract_int8_fc_packed_weights(int8_weight_blob_name)
```

Test Plan:
```
buck test mode/opt deeplearning/numeric_suite/toolkit/test:packed_weights_inspector_test
```

Reviewed By: amylittleyang

Differential Revision: D19724474

fbshipit-source-id: e937672b3722e61bc44c2587aab2288a86aece9a
This commit is contained in:
Summer Deng 2020-02-08 18:14:25 -08:00 committed by Facebook Github Bot
parent 6249d7302b
commit e2f1288514

View file

@ -1,9 +1,12 @@
#include <fbgemm/FbgemmFP16.h>
#include <fbgemm/Utils.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include "activation_distribution_observer.h"
#include "caffe2/opt/custom/fakefp16_transform.h"
#include "caffe2/quantization/server/fbgemm_pack_blob.h"
#include "caffe2_dnnlowp_utils.h"
#include "quantization_error_minimization.h"
#include "caffe2/opt/custom/fakefp16_transform.h"
namespace caffe2 {
namespace python {
@ -278,4 +281,96 @@ PYBIND11_MODULE(dnnlowp_pybind11, m) {
pybind11::arg("quant_scheme") = "min_max",
pybind11::arg("p99_threshold") = 0.99,
pybind11::arg("is_weight") = false);
m.def(
"ObserveFp16FCPackedWeights",
[](const string& blob_name, const string& weights_out_file) {
Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
CAFFE_ENFORCE(gWorkspace);
const auto* blob = gWorkspace->GetBlob(blob_name);
CAFFE_ENFORCE(blob, "Can't find blob ", blob_name);
fbgemm::PackedGemmMatrixFP16* packedGemmMatrixPtr =
blob->template Get<unique_ptr<fbgemm::PackedGemmMatrixFP16>>()
.get();
uint64_t nrow = packedGemmMatrixPtr->numRows();
uint64_t ncol = packedGemmMatrixPtr->numCols();
uint64_t size = nrow * ncol;
fbgemm::float16* unpacked_mat_ptr = nullptr;
vector<fbgemm::float16> unpacked_mat;
if (!packedGemmMatrixPtr->packed()) {
unpacked_mat_ptr = packedGemmMatrixPtr->pmat();
} else {
unpacked_mat.resize(size);
packedGemmMatrixPtr->unpack(
unpacked_mat.data(), fbgemm::matrix_op_t::Transpose);
unpacked_mat_ptr = unpacked_mat.data();
}
ofstream fout;
fout.open(weights_out_file);
if (!fout) {
LOG(WARNING) << "Can't open output file to dump fp16 weights "
<< weights_out_file;
return;
}
for (int i = 0; i < nrow; ++i) {
for (int j = 0; j < ncol; ++j) {
if (j > 0) {
fout << " ";
}
fout << fbgemm::cpu_half2float(unpacked_mat_ptr[i + nrow * j]);
}
fout << endl;
}
LOG(INFO) << "Written unpacked blob " << blob_name << " to "
<< weights_out_file;
},
pybind11::arg("blob_name"),
pybind11::arg("weights_out_file"));
m.def(
"ObserveInt8FCPackedWeights",
[](const string& blob_name, const string& weights_out_file) {
Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
CAFFE_ENFORCE(gWorkspace);
const auto* blob = gWorkspace->GetBlob(blob_name);
if (blob == nullptr) {
LOG(WARNING) << "Can't find blob " << blob_name;
return;
}
const Int8FCDNNLowPPackedWeightBlob& packedInt8Blob =
blob->template Get<Int8FCDNNLowPPackedWeightBlob>();
auto& qparams = packedInt8Blob.qparams;
auto& int8_tensor = packedInt8Blob.original_tensor;
auto shape = int8_tensor.sizes();
ofstream fout;
fout.open(weights_out_file);
if (!fout) {
LOG(WARNING) << "Can't open output file to dump int8 weights "
<< weights_out_file;
return;
}
for (int i = 0; i < qparams.size(); ++i) {
if (i > 0) {
fout << " ";
}
fout << to_string(qparams[i].scale) << " "
<< to_string(qparams[i].zero_point);
}
fout << endl;
int8_t* int8_data = int8_tensor.data<int8_t>();
for (int i = 0; i < shape[0]; ++i) {
for (int j = 0; j < shape[1]; ++j) {
if (j > 0) {
fout << " ";
}
fout << to_string(int8_data[i * shape[1] + j]);
}
fout << endl;
}
LOG(INFO) << "Written int8 qparams and weights for " << blob_name
<< " to " << weights_out_file;
},
pybind11::arg("blob_name"),
pybind11::arg("weights_out_file"));
}