pytorch/caffe2/quantization/server/quantization_error_minimization.h
Summer Deng ee68c512c5 Add P99 method with configurable thresholds
Summary:
Update the P99 quantization method with configurable thresholds.
Add dnnlowp options for the configuration.

Test Plan: buck run mode/opt experimental/summerdeng/xray_image:test_net_quantization -- --model_path=/mnt/public/summerdeng/xray_image/models/oct_resnext101_50_2B_pretrained.mdl --batch_size=1 --test_max_images=100 --octave_conv --octave_conv_ratio=0.5 --output_dir=/mnt/public/summerdeng/xray_image/output --quantize --histogram_file=/mnt/public/summerdeng/xray_image/activation_histograms/oct_resnext101_50_2B_pretrained_hist_200k_compiled.txt --int8_model_type="mdl" --int8_model_mdl_name="int8_oct_resnext101_50_2B_l2_nongroupwise.mdl" --skip_first_conv --weight_quant="l2" --activation_quant="p99" --activation_p99_threshold=0.999 --measure_quantization_error

Reviewed By: amylittleyang

Differential Revision: D16626158

fbshipit-source-id: 7718dcf429f73aa54e82a6b6f6e631d94e3a134c
2019-09-27 15:53:20 -07:00

57 lines
1.3 KiB
C++

#pragma once
#include "dnnlowp.h"
namespace dnnlowp {
class QuantizationErrorMinimization {
public:
virtual TensorQuantizationParams ChooseQuantizationParams(
const Histogram& hist,
bool preserve_sparsity = false,
int precision = 8) = 0;
virtual ~QuantizationErrorMinimization(){};
};
class NormMinimization : public QuantizationErrorMinimization {
public:
enum Kind {
L1,
L2,
};
NormMinimization(Kind kind) : kind_(kind) {}
/**
* Faster approximate search
*/
TensorQuantizationParams NonlinearQuantizationParamsSearch(
const Histogram& hist,
bool preserve_sparsity = false,
int precision = 8);
TensorQuantizationParams ChooseQuantizationParams(
const Histogram& hist,
bool preserve_sparsity = false,
int precision = 8) override;
protected:
Kind kind_;
};
class L1ErrorMinimization : public NormMinimization {
public:
L1ErrorMinimization() : NormMinimization(L1) {}
};
class P99 : public QuantizationErrorMinimization {
public:
float threshold_;
P99(float p99_threshold = 0.99) : threshold_(p99_threshold) {}
TensorQuantizationParams ChooseQuantizationParams(
const Histogram& hist,
bool preserve_sparsity = true,
int precision = 8) override;
}; // class P99QuantizationFactory
} // namespace dnnlowp