From e2f12885140c36c1d5bf82de6eb47797856fdacd Mon Sep 17 00:00:00 2001 From: Summer Deng Date: Sat, 8 Feb 2020 18:14:25 -0800 Subject: [PATCH] Add utils to inspect fp16/int8 packed weights (#32979) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/32979 Since we use prepacked weights in the Fp16 FCs and future Int8 FCs in production Ads models, we provide the python utils to inspect the unpacked format of the weights for debugging purpose. The main interfaces are the following: ``` from deeplearning.numeric_suite.toolkit import packed_weights_inspector # inspect fp16 packed weights unpacked_fp16_weights = packed_weights_inspector.extract_fp16_fc_packed_weights(fp16_weight_blob_name) # inspect int8 packed weights unpacked_int8_weights, qparams = packed_weights_inspector.extract_int8_fc_packed_weights(int8_weight_blob_name) ``` Test Plan: ``` buck test mode/opt deeplearning/numeric_suite/toolkit/test:packed_weights_inspector_test ``` Reviewed By: amylittleyang Differential Revision: D19724474 fbshipit-source-id: e937672b3722e61bc44c2587aab2288a86aece9a --- caffe2/quantization/server/pybind.cc | 97 +++++++++++++++++++++++++++- 1 file changed, 96 insertions(+), 1 deletion(-) diff --git a/caffe2/quantization/server/pybind.cc b/caffe2/quantization/server/pybind.cc index c012f0f3961..bdf828f7745 100644 --- a/caffe2/quantization/server/pybind.cc +++ b/caffe2/quantization/server/pybind.cc @@ -1,9 +1,12 @@ +#include +#include #include #include #include "activation_distribution_observer.h" +#include "caffe2/opt/custom/fakefp16_transform.h" +#include "caffe2/quantization/server/fbgemm_pack_blob.h" #include "caffe2_dnnlowp_utils.h" #include "quantization_error_minimization.h" -#include "caffe2/opt/custom/fakefp16_transform.h" namespace caffe2 { namespace python { @@ -278,4 +281,96 @@ PYBIND11_MODULE(dnnlowp_pybind11, m) { pybind11::arg("quant_scheme") = "min_max", pybind11::arg("p99_threshold") = 0.99, pybind11::arg("is_weight") = false); + m.def( + "ObserveFp16FCPackedWeights", + [](const string& blob_name, const string& weights_out_file) { + Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace(); + CAFFE_ENFORCE(gWorkspace); + const auto* blob = gWorkspace->GetBlob(blob_name); + CAFFE_ENFORCE(blob, "Can't find blob ", blob_name); + fbgemm::PackedGemmMatrixFP16* packedGemmMatrixPtr = + blob->template Get>() + .get(); + uint64_t nrow = packedGemmMatrixPtr->numRows(); + uint64_t ncol = packedGemmMatrixPtr->numCols(); + uint64_t size = nrow * ncol; + fbgemm::float16* unpacked_mat_ptr = nullptr; + vector unpacked_mat; + + if (!packedGemmMatrixPtr->packed()) { + unpacked_mat_ptr = packedGemmMatrixPtr->pmat(); + } else { + unpacked_mat.resize(size); + packedGemmMatrixPtr->unpack( + unpacked_mat.data(), fbgemm::matrix_op_t::Transpose); + unpacked_mat_ptr = unpacked_mat.data(); + } + ofstream fout; + fout.open(weights_out_file); + if (!fout) { + LOG(WARNING) << "Can't open output file to dump fp16 weights " + << weights_out_file; + return; + } + for (int i = 0; i < nrow; ++i) { + for (int j = 0; j < ncol; ++j) { + if (j > 0) { + fout << " "; + } + fout << fbgemm::cpu_half2float(unpacked_mat_ptr[i + nrow * j]); + } + fout << endl; + } + LOG(INFO) << "Written unpacked blob " << blob_name << " to " + << weights_out_file; + }, + pybind11::arg("blob_name"), + pybind11::arg("weights_out_file")); + m.def( + "ObserveInt8FCPackedWeights", + [](const string& blob_name, const string& weights_out_file) { + Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace(); + CAFFE_ENFORCE(gWorkspace); + const auto* blob = gWorkspace->GetBlob(blob_name); + if (blob == nullptr) { + LOG(WARNING) << "Can't find blob " << blob_name; + return; + } + const Int8FCDNNLowPPackedWeightBlob& packedInt8Blob = + blob->template Get(); + auto& qparams = packedInt8Blob.qparams; + auto& int8_tensor = packedInt8Blob.original_tensor; + + auto shape = int8_tensor.sizes(); + + ofstream fout; + fout.open(weights_out_file); + if (!fout) { + LOG(WARNING) << "Can't open output file to dump int8 weights " + << weights_out_file; + return; + } + for (int i = 0; i < qparams.size(); ++i) { + if (i > 0) { + fout << " "; + } + fout << to_string(qparams[i].scale) << " " + << to_string(qparams[i].zero_point); + } + fout << endl; + int8_t* int8_data = int8_tensor.data(); + for (int i = 0; i < shape[0]; ++i) { + for (int j = 0; j < shape[1]; ++j) { + if (j > 0) { + fout << " "; + } + fout << to_string(int8_data[i * shape[1] + j]); + } + fout << endl; + } + LOG(INFO) << "Written int8 qparams and weights for " << blob_name + << " to " << weights_out_file; + }, + pybind11::arg("blob_name"), + pybind11::arg("weights_out_file")); }