mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-15 21:00:47 +00:00
Summary: Update the P99 quantization method with configurable thresholds. Add dnnlowp options for the configuration. Test Plan: buck run mode/opt experimental/summerdeng/xray_image:test_net_quantization -- --model_path=/mnt/public/summerdeng/xray_image/models/oct_resnext101_50_2B_pretrained.mdl --batch_size=1 --test_max_images=100 --octave_conv --octave_conv_ratio=0.5 --output_dir=/mnt/public/summerdeng/xray_image/output --quantize --histogram_file=/mnt/public/summerdeng/xray_image/activation_histograms/oct_resnext101_50_2B_pretrained_hist_200k_compiled.txt --int8_model_type="mdl" --int8_model_mdl_name="int8_oct_resnext101_50_2B_l2_nongroupwise.mdl" --skip_first_conv --weight_quant="l2" --activation_quant="p99" --activation_p99_threshold=0.999 --measure_quantization_error Reviewed By: amylittleyang Differential Revision: D16626158 fbshipit-source-id: 7718dcf429f73aa54e82a6b6f6e631d94e3a134c
57 lines
1.3 KiB
C++
57 lines
1.3 KiB
C++
#pragma once
|
|
|
|
#include "dnnlowp.h"
|
|
|
|
namespace dnnlowp {
|
|
|
|
class QuantizationErrorMinimization {
|
|
public:
|
|
virtual TensorQuantizationParams ChooseQuantizationParams(
|
|
const Histogram& hist,
|
|
bool preserve_sparsity = false,
|
|
int precision = 8) = 0;
|
|
virtual ~QuantizationErrorMinimization(){};
|
|
};
|
|
|
|
class NormMinimization : public QuantizationErrorMinimization {
|
|
public:
|
|
enum Kind {
|
|
L1,
|
|
L2,
|
|
};
|
|
|
|
NormMinimization(Kind kind) : kind_(kind) {}
|
|
|
|
/**
|
|
* Faster approximate search
|
|
*/
|
|
TensorQuantizationParams NonlinearQuantizationParamsSearch(
|
|
const Histogram& hist,
|
|
bool preserve_sparsity = false,
|
|
int precision = 8);
|
|
|
|
TensorQuantizationParams ChooseQuantizationParams(
|
|
const Histogram& hist,
|
|
bool preserve_sparsity = false,
|
|
int precision = 8) override;
|
|
|
|
protected:
|
|
Kind kind_;
|
|
};
|
|
|
|
class L1ErrorMinimization : public NormMinimization {
|
|
public:
|
|
L1ErrorMinimization() : NormMinimization(L1) {}
|
|
};
|
|
|
|
class P99 : public QuantizationErrorMinimization {
|
|
public:
|
|
float threshold_;
|
|
P99(float p99_threshold = 0.99) : threshold_(p99_threshold) {}
|
|
TensorQuantizationParams ChooseQuantizationParams(
|
|
const Histogram& hist,
|
|
bool preserve_sparsity = true,
|
|
int precision = 8) override;
|
|
}; // class P99QuantizationFactory
|
|
|
|
} // namespace dnnlowp
|