mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-26 03:00:54 +00:00
Lochi/refactor yolov3 quantization (#6290)
* Refactor the code and move data reader, preprocessing, evaluation to E2E_example_mode * Refactor the code. Move data reader, preprocessing, evaluation to model specific example under E2E_example_mode * refactor code * Move yolov3 example to specific folder and add additional pre/post processing
This commit is contained in:
parent
a36f627a4c
commit
7c5bfbaaab
6 changed files with 525 additions and 45 deletions
|
|
@ -1,5 +1,5 @@
|
|||
from onnxruntime.quantization import CalibrationDataReader
|
||||
from preprocessing import yolov3_preprocess_func, yolov3_vision_preprocess_func
|
||||
from preprocessing import yolov3_preprocess_func, yolov3_variant_preprocess_func
|
||||
import onnxruntime
|
||||
from argparse import Namespace
|
||||
import os
|
||||
|
|
@ -165,7 +165,7 @@ class YoloV3DataReader(ObejctDetectionDataReader):
|
|||
return batches
|
||||
|
||||
|
||||
class YoloV3VisionDataReader(YoloV3DataReader):
|
||||
class YoloV3VariantDataReader(YoloV3DataReader):
|
||||
def __init__(self,
|
||||
calibration_image_folder,
|
||||
width=608,
|
||||
|
|
@ -179,14 +179,17 @@ class YoloV3VisionDataReader(YoloV3DataReader):
|
|||
annotations='./annotations/instances_val2017.json'):
|
||||
YoloV3DataReader.__init__(self, calibration_image_folder, width, height, start_index, end_index, stride,
|
||||
batch_size, model_path, is_evaluation, annotations)
|
||||
self.input_name = 'images'
|
||||
self.input_name = '000_net'
|
||||
# self.input_name = 'images'
|
||||
|
||||
def load_serial(self):
|
||||
width = self.width
|
||||
height = self.height
|
||||
input_name = self.input_name
|
||||
nchw_data_list, filename_list, image_size_list = yolov3_vision_preprocess_func(
|
||||
nchw_data_list, filename_list, image_size_list = yolov3_variant_preprocess_func(
|
||||
self.image_folder, height, width, self.start_index, self.stride)
|
||||
# nchw_data_list, filename_list, image_size_list = yolov3_variant_2_preprocess_func(
|
||||
# self.image_folder, height, width, self.start_index, self.stride)
|
||||
|
||||
data = []
|
||||
if self.is_evaluation:
|
||||
|
|
|
|||
|
|
@ -1,29 +1,7 @@
|
|||
import os
|
||||
from onnxruntime.quantization import get_calibrator, write_calibration_table, generate_calibration_table
|
||||
from data_reader import YoloV3DataReader, YoloV3VisionDataReader
|
||||
from evaluate import YoloV3Evaluator, YoloV3VisionEvaluator
|
||||
from dataset_utils import *
|
||||
|
||||
|
||||
def get_prediction_evaluation(model_path, validation_dataset, providers):
|
||||
data_reader = YoloV3DataReader(validation_dataset,
|
||||
stride=1000,
|
||||
batch_size=1,
|
||||
model_path=model_path,
|
||||
is_evaluation=True)
|
||||
evaluator = YoloV3Evaluator(model_path, data_reader, providers=providers)
|
||||
|
||||
# data_reader = YoloV3VisionDataReader(validation_dataset, width=608, height=384, stride=1000, batch_size=1, model_path=model_path, is_evaluation=True)
|
||||
# evaluator = YoloV3VisionEvaluator(model_path, data_reader, width=608, height=384, providers=providers)
|
||||
|
||||
evaluator.predict()
|
||||
result = evaluator.get_result()
|
||||
|
||||
annotations = './annotations/instances_val2017.json'
|
||||
# annotations = './annotations/instances_val2017_person.json'
|
||||
print(result)
|
||||
evaluator.evaluate(result, annotations)
|
||||
|
||||
from data_reader import YoloV3DataReader, YoloV3VariantDataReader
|
||||
from evaluate import YoloV3Evaluator, YoloV3VariantEvaluator
|
||||
|
||||
def get_calibration_table(model_path, augmented_model_path, calibration_dataset):
|
||||
|
||||
|
|
@ -62,27 +40,101 @@ def get_calibration_table(model_path, augmented_model_path, calibration_dataset)
|
|||
'''
|
||||
|
||||
# data_reader = YoloV3DataReader(calibration_dataset, stride=1000, batch_size=20, model_path=augmented_model_path)
|
||||
# data_reader = YoloV3VisionDataReader(calibration_dataset, width=512, height=288, stride=1000, batch_size=20, model_path=augmented_model_path)
|
||||
# data_reader = YoloV3VisionDataReader(calibration_dataset, width=608, height=384, stride=1000, batch_size=20, model_path=augmented_model_path)
|
||||
# calibrator.set_data_reader(data_reader)
|
||||
# generate_calibration_table(calibrator, model_path, augmented_model_path, True, data_reader)
|
||||
|
||||
write_calibration_table(calibrator.get_calibration_cache())
|
||||
print('calibration table generated and saved.')
|
||||
|
||||
def get_prediction_evaluation(model_path, validation_dataset, providers):
|
||||
data_reader = YoloV3DataReader(validation_dataset,
|
||||
stride=1000,
|
||||
batch_size=1,
|
||||
model_path=model_path,
|
||||
is_evaluation=True)
|
||||
evaluator = YoloV3Evaluator(model_path, data_reader, providers=providers)
|
||||
|
||||
evaluator.predict()
|
||||
result = evaluator.get_result()
|
||||
|
||||
annotations = './annotations/instances_val2017.json'
|
||||
print(result)
|
||||
evaluator.evaluate(result, annotations)
|
||||
|
||||
def get_calibration_table_yolov3_variant(model_path, augmented_model_path, calibration_dataset):
|
||||
|
||||
calibrator = get_calibrator(model_path, None, augmented_model_path=augmented_model_path)
|
||||
|
||||
# DataReader can handle dataset with batch or serial processing depends on its implementation
|
||||
# Following examples show two different ways to generate calibration table
|
||||
'''
|
||||
1. Use serial processing
|
||||
|
||||
We can use only one DataReader to do serial processing, however,
|
||||
some machines don't have sufficient memory to hold all dataset images and all intermediate output.
|
||||
So let multiple DataReader do handle different stride of dataset one by one.
|
||||
DataReader will use serial processing when batch_size is 1.
|
||||
'''
|
||||
|
||||
total_data_size = len(os.listdir(calibration_dataset))
|
||||
start_index = 0
|
||||
stride = 25
|
||||
for i in range(0, total_data_size, stride):
|
||||
data_reader = YoloV3VariantDataReader(calibration_dataset,
|
||||
width=608,
|
||||
height=608,
|
||||
start_index=start_index,
|
||||
end_index=start_index + stride,
|
||||
stride=stride,
|
||||
batch_size=1,
|
||||
model_path=augmented_model_path)
|
||||
calibrator.set_data_reader(data_reader)
|
||||
generate_calibration_table(calibrator, model_path, augmented_model_path, False, data_reader)
|
||||
start_index += stride
|
||||
'''
|
||||
2. Use batch processing (much faster)
|
||||
|
||||
Batch processing requires less memory for intermediate output, therefore let only one DataReader to handle dataset in batch.
|
||||
However, if encountering OOM, we can make multiple DataReader to do the job just like serial processing does.
|
||||
DataReader will use batch processing when batch_size > 1.
|
||||
'''
|
||||
|
||||
# data_reader = YoloV3VariantDataReader(calibration_dataset, width=608, height=608, stride=1000, batch_size=20, model_path=augmented_model_path)
|
||||
# calibrator.set_data_reader(data_reader)
|
||||
# generate_calibration_table(calibrator, model_path, augmented_model_path, True, data_reader)
|
||||
|
||||
write_calibration_table(calibrator.get_calibration_cache())
|
||||
print('calibration table generated and saved.')
|
||||
|
||||
def get_prediction_evaluation_yolov3_variant(model_path, validation_dataset, providers):
|
||||
data_reader = YoloV3VariantDataReader(validation_dataset, width=608, height=608, stride=1000, batch_size=1, model_path=model_path, is_evaluation=True)
|
||||
evaluator = YoloV3VariantEvaluator(model_path, data_reader, width=608, height=608, providers=providers)
|
||||
|
||||
evaluator.predict()
|
||||
result = evaluator.get_result()
|
||||
|
||||
annotations = './annotations/instances_val2017.json'
|
||||
print(result)
|
||||
evaluator.evaluate(result, annotations)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
model_path = 'yolov3_new.onnx'
|
||||
# model_path = 'yolov3_288x512_batch_nms.onnx'
|
||||
# model_path = 'yolov3_384x608_batch_nms.onnx'
|
||||
|
||||
yolov3 = 'model zoo'
|
||||
augmented_model_path = 'augmented_model.onnx'
|
||||
|
||||
calibration_dataset = './test2017'
|
||||
|
||||
validation_dataset = './val2017'
|
||||
# validation_dataset = './val2017person'
|
||||
|
||||
get_calibration_table(model_path, augmented_model_path, calibration_dataset)
|
||||
get_prediction_evaluation(model_path, validation_dataset, ["TensorrtExecutionProvider"])
|
||||
if yolov3 == 'model zoo':
|
||||
# ONNX Model Zoo yolov3
|
||||
model_path = 'yolov3.onnx'
|
||||
get_calibration_table(model_path, augmented_model_path, calibration_dataset)
|
||||
get_prediction_evaluation(model_path, validation_dataset, ["TensorrtExecutionProvider"])
|
||||
else:
|
||||
# Yolov3 variants from here
|
||||
# https://github.com/jkjung-avt/tensorrt_demos.git
|
||||
model_path = 'yolov3-608.onnx'
|
||||
get_calibration_table_yolov3_variant(model_path, augmented_model_path, calibration_dataset)
|
||||
get_prediction_evaluation_yolov3_variant(model_path, validation_dataset, ["TensorrtExecutionProvider"])
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -184,8 +184,76 @@ class YoloV3Evaluator:
|
|||
cocoEval.accumulate()
|
||||
cocoEval.summarize()
|
||||
|
||||
class YoloV3VariantEvaluator(YoloV3Evaluator):
|
||||
def __init__(self, model_path,
|
||||
data_reader: CalibrationDataReader,
|
||||
width=608,
|
||||
height=384,
|
||||
providers=["CUDAExecutionProvider"],
|
||||
ground_truth_object_class_file="./coco-object-categories-2017.json",
|
||||
onnx_object_class_file="./onnx_coco_classes.txt"):
|
||||
|
||||
class YoloV3VisionEvaluator(YoloV3Evaluator):
|
||||
YoloV3Evaluator.__init__(self, model_path, data_reader,width, height, providers, ground_truth_object_class_file, onnx_object_class_file)
|
||||
|
||||
def predict(self):
|
||||
from postprocessing import PostprocessYOLOWrapper
|
||||
session = onnxruntime.InferenceSession(self.model_path, providers=self.providers)
|
||||
outputs = []
|
||||
|
||||
image_id_list = []
|
||||
image_id_batch = []
|
||||
image_size_list = []
|
||||
image_size_batch = []
|
||||
|
||||
postprocess_yolo = PostprocessYOLOWrapper('yolov3', (608, 608))
|
||||
|
||||
while True:
|
||||
inputs = self.data_reader.get_next()
|
||||
if not inputs:
|
||||
break
|
||||
image_size_list = inputs["image_size"]
|
||||
image_id_list = inputs["image_id"]
|
||||
del inputs["image_size"]
|
||||
del inputs["image_id"]
|
||||
|
||||
# in the case of batch size is 1
|
||||
if type(image_id_list) == int:
|
||||
image_size_list = [image_size_list]
|
||||
image_id_list = [image_id_list]
|
||||
|
||||
|
||||
image_size_batch.append(image_size_list)
|
||||
image_id_batch.append(image_id_list)
|
||||
outputs.append(session.run(None, inputs))
|
||||
|
||||
for i in range(len(outputs)):
|
||||
output = outputs[i]
|
||||
|
||||
for batch_i in range(self.data_reader.get_batch_size()):
|
||||
|
||||
if batch_i > len(image_size_batch[i])-1 or batch_i > len(image_id_batch[i])-1:
|
||||
continue
|
||||
|
||||
image_height = image_size_batch[i][batch_i][0]
|
||||
image_width= image_size_batch[i][batch_i][1]
|
||||
image_id = image_id_batch[i][batch_i]
|
||||
|
||||
boxes, classes, scores = postprocess_yolo.postprocessor.process(
|
||||
output, (image_width, image_height), 0.01)
|
||||
|
||||
for j in range(len(boxes)):
|
||||
box = boxes[j]
|
||||
class_name = self.onnx_class_list[int(classes[j])]
|
||||
if class_name in self.identical_class_map:
|
||||
class_name = self.identical_class_map[class_name]
|
||||
id = self.class_to_id[class_name]
|
||||
x = float(box[0])
|
||||
y = float(box[1])
|
||||
w = float(box[2] - box[0] + 1)
|
||||
h = float(box[3] - box[1] + 1)
|
||||
self.prediction_result_list.append({"image_id":int(image_id), "category_id":int(id), "bbox":[x,y,w,h], "score":scores[j]})
|
||||
|
||||
class YoloV3Variant2Evaluator(YoloV3Evaluator):
|
||||
def __init__(self,
|
||||
model_path,
|
||||
data_reader: CalibrationDataReader,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,296 @@
|
|||
import numpy as np
|
||||
class PostprocessYOLO(object):
|
||||
"""Class for post-processing the three output tensors from YOLO."""
|
||||
|
||||
def __init__(self,
|
||||
yolo_masks,
|
||||
yolo_anchors,
|
||||
nms_threshold,
|
||||
yolo_input_resolution,
|
||||
category_num=80):
|
||||
"""Initialize with all values that will be kept when processing
|
||||
several frames. Assuming 3 outputs of the network in the case
|
||||
of (large) YOLO, or 2 for the Tiny YOLO.
|
||||
|
||||
Keyword arguments:
|
||||
yolo_masks -- a list of 3 (or 2) three-dimensional tuples for the YOLO masks
|
||||
yolo_anchors -- a list of 9 (or 6) two-dimensional tuples for the YOLO anchors
|
||||
object_threshold -- threshold for object coverage, float value between 0 and 1
|
||||
nms_threshold -- threshold for non-max suppression algorithm,
|
||||
float value between 0 and 1
|
||||
input_wh -- tuple (W, H) for the target network
|
||||
category_num -- number of output categories/classes
|
||||
"""
|
||||
self.masks = yolo_masks
|
||||
self.anchors = yolo_anchors
|
||||
self.nms_threshold = nms_threshold
|
||||
self.input_wh = (yolo_input_resolution[1], yolo_input_resolution[0])
|
||||
self.category_num = category_num
|
||||
|
||||
def process(self, outputs, resolution_raw, conf_th):
|
||||
"""Take the YOLO outputs generated from a TensorRT forward pass, post-process them
|
||||
and return a list of bounding boxes for detected object together with their category
|
||||
and their confidences in separate lists.
|
||||
|
||||
Keyword arguments:
|
||||
outputs -- outputs from a TensorRT engine in NCHW format
|
||||
resolution_raw -- the original spatial resolution from the input PIL image in WH order
|
||||
conf_th -- confidence threshold, e.g. 0.3
|
||||
"""
|
||||
outputs_reshaped = list()
|
||||
for output in outputs:
|
||||
outputs_reshaped.append(self._reshape_output(output))
|
||||
|
||||
boxes_xywh, categories, confidences = self._process_yolo_output(
|
||||
outputs_reshaped, resolution_raw, conf_th)
|
||||
|
||||
if len(boxes_xywh) > 0:
|
||||
# convert (x, y, width, height) to (x1, y1, x2, y2)
|
||||
img_w, img_h = resolution_raw
|
||||
xx = boxes_xywh[:, 0].reshape(-1, 1)
|
||||
yy = boxes_xywh[:, 1].reshape(-1, 1)
|
||||
ww = boxes_xywh[:, 2].reshape(-1, 1)
|
||||
hh = boxes_xywh[:, 3].reshape(-1, 1)
|
||||
boxes = np.concatenate([xx, yy, xx+ww, yy+hh], axis=1) + 0.5
|
||||
boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0., float(img_w-1))
|
||||
boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0., float(img_h-1))
|
||||
boxes = boxes.astype(np.int)
|
||||
else:
|
||||
boxes = np.zeros((0, 4), dtype=np.int) # empty
|
||||
|
||||
return boxes, categories, confidences
|
||||
|
||||
def _reshape_output(self, output):
|
||||
"""Reshape a TensorRT output from NCHW to NHWC format (with expected C=255),
|
||||
and then return it in (height,width,3,85) dimensionality after further reshaping.
|
||||
|
||||
Keyword argument:
|
||||
output -- an output from a TensorRT engine after inference
|
||||
"""
|
||||
output = np.transpose(output, [0, 2, 3, 1])
|
||||
_, height, width, _ = output.shape
|
||||
dim1, dim2 = height, width
|
||||
dim3 = 3
|
||||
# There are CATEGORY_NUM=80 object categories:
|
||||
dim4 = (4 + 1 + self.category_num)
|
||||
return np.reshape(output, (dim1, dim2, dim3, dim4))
|
||||
|
||||
def _process_yolo_output(self, outputs_reshaped, resolution_raw, conf_th):
|
||||
"""Take in a list of three reshaped YOLO outputs in (height,width,3,85) shape and return
|
||||
return a list of bounding boxes for detected object together with their category and their
|
||||
confidences in separate lists.
|
||||
|
||||
Keyword arguments:
|
||||
outputs_reshaped -- list of three reshaped YOLO outputs as NumPy arrays
|
||||
with shape (height,width,3,85)
|
||||
resolution_raw -- the original spatial resolution from the input PIL image in WH order
|
||||
conf_th -- confidence threshold
|
||||
"""
|
||||
|
||||
# E.g. in YOLOv3-608, there are three output tensors, which we associate with their
|
||||
# respective masks. Then we iterate through all output-mask pairs and generate candidates
|
||||
# for bounding boxes, their corresponding category predictions and their confidences:
|
||||
boxes, categories, confidences = list(), list(), list()
|
||||
for output, mask in zip(outputs_reshaped, self.masks):
|
||||
box, category, confidence = self._process_feats(output, mask)
|
||||
box, category, confidence = self._filter_boxes(box, category, confidence, conf_th)
|
||||
boxes.append(box)
|
||||
categories.append(category)
|
||||
confidences.append(confidence)
|
||||
|
||||
boxes = np.concatenate(boxes)
|
||||
categories = np.concatenate(categories)
|
||||
confidences = np.concatenate(confidences)
|
||||
|
||||
# Scale boxes back to original image shape:
|
||||
width, height = resolution_raw
|
||||
image_dims = [width, height, width, height]
|
||||
boxes = boxes * image_dims
|
||||
|
||||
# Using the candidates from the previous (loop) step, we apply the non-max suppression
|
||||
# algorithm that clusters adjacent bounding boxes to a single bounding box:
|
||||
nms_boxes, nms_categories, nscores = list(), list(), list()
|
||||
for category in set(categories):
|
||||
idxs = np.where(categories == category)
|
||||
box = boxes[idxs]
|
||||
category = categories[idxs]
|
||||
confidence = confidences[idxs]
|
||||
|
||||
keep = self._nms_boxes(box, confidence)
|
||||
|
||||
nms_boxes.append(box[keep])
|
||||
nms_categories.append(category[keep])
|
||||
nscores.append(confidence[keep])
|
||||
|
||||
if not nms_categories and not nscores:
|
||||
return (np.empty((0, 4), dtype=np.float32),
|
||||
np.empty((0, 1), dtype=np.float32),
|
||||
np.empty((0, 1), dtype=np.float32))
|
||||
|
||||
boxes = np.concatenate(nms_boxes)
|
||||
categories = np.concatenate(nms_categories)
|
||||
confidences = np.concatenate(nscores)
|
||||
|
||||
return boxes, categories, confidences
|
||||
|
||||
def _process_feats(self, output_reshaped, mask):
|
||||
"""Take in a reshaped YOLO output in height,width,3,85 format together with its
|
||||
corresponding YOLO mask and return the detected bounding boxes, the confidence,
|
||||
and the class probability in each cell/pixel.
|
||||
|
||||
Keyword arguments:
|
||||
output_reshaped -- reshaped YOLO output as NumPy arrays with shape (height,width,3,85)
|
||||
mask -- 2-dimensional tuple with mask specification for this output
|
||||
"""
|
||||
|
||||
def sigmoid_v(array):
|
||||
return np.reciprocal(np.exp(-array) + 1.0)
|
||||
|
||||
def exponential_v(array):
|
||||
return np.exp(array)
|
||||
|
||||
grid_h, grid_w, _, _ = output_reshaped.shape
|
||||
|
||||
anchors = [self.anchors[i] for i in mask]
|
||||
|
||||
# Reshape to N, height, width, num_anchors, box_params:
|
||||
anchors_tensor = np.reshape(anchors, [1, 1, len(anchors), 2])
|
||||
box_xy = sigmoid_v(output_reshaped[..., 0:2])
|
||||
box_wh = exponential_v(output_reshaped[..., 2:4]) * anchors_tensor
|
||||
box_confidence = sigmoid_v(output_reshaped[..., 4:5])
|
||||
box_class_probs = sigmoid_v(output_reshaped[..., 5:])
|
||||
|
||||
col = np.tile(np.arange(0, grid_w), grid_h).reshape(-1, grid_w)
|
||||
row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_w)
|
||||
|
||||
col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
|
||||
row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
|
||||
grid = np.concatenate((col, row), axis=-1)
|
||||
|
||||
box_xy += grid
|
||||
box_xy /= (grid_w, grid_h)
|
||||
box_wh /= self.input_wh
|
||||
box_xy -= (box_wh / 2.)
|
||||
boxes = np.concatenate((box_xy, box_wh), axis=-1)
|
||||
|
||||
# boxes: centroids, box_confidence: confidence level, box_class_probs:
|
||||
# class confidence
|
||||
return boxes, box_confidence, box_class_probs
|
||||
|
||||
def _filter_boxes(self, boxes, box_confidences, box_class_probs, conf_th):
|
||||
"""Take in the unfiltered bounding box descriptors and discard each cell
|
||||
whose score is lower than the object threshold set during class initialization.
|
||||
|
||||
Keyword arguments:
|
||||
boxes -- bounding box coordinates with shape (height,width,3,4); 4 for
|
||||
x,y,height,width coordinates of the boxes
|
||||
box_confidences -- bounding box confidences with shape (height,width,3,1); 1 for as
|
||||
confidence scalar per element
|
||||
box_class_probs -- class probabilities with shape (height,width,3,CATEGORY_NUM)
|
||||
conf_th -- confidence threshold
|
||||
"""
|
||||
box_scores = box_confidences * box_class_probs
|
||||
box_classes = np.argmax(box_scores, axis=-1)
|
||||
box_class_scores = np.max(box_scores, axis=-1)
|
||||
pos = np.where(box_class_scores >= conf_th)
|
||||
|
||||
boxes = boxes[pos]
|
||||
classes = box_classes[pos]
|
||||
scores = box_class_scores[pos]
|
||||
|
||||
return boxes, classes, scores
|
||||
|
||||
def _nms_boxes(self, boxes, box_confidences):
|
||||
"""Apply the Non-Maximum Suppression (NMS) algorithm on the bounding boxes with their
|
||||
confidence scores and return an array with the indexes of the bounding boxes we want to
|
||||
keep (and display later).
|
||||
|
||||
Keyword arguments:
|
||||
boxes -- a NumPy array containing N bounding-box coordinates that survived filtering,
|
||||
with shape (N,4); 4 for x,y,height,width coordinates of the boxes
|
||||
box_confidences -- a Numpy array containing the corresponding confidences with shape N
|
||||
"""
|
||||
x_coord = boxes[:, 0]
|
||||
y_coord = boxes[:, 1]
|
||||
width = boxes[:, 2]
|
||||
height = boxes[:, 3]
|
||||
|
||||
areas = width * height
|
||||
ordered = box_confidences.argsort()[::-1]
|
||||
|
||||
keep = list()
|
||||
while ordered.size > 0:
|
||||
# Index of the current element:
|
||||
i = ordered[0]
|
||||
keep.append(i)
|
||||
xx1 = np.maximum(x_coord[i], x_coord[ordered[1:]])
|
||||
yy1 = np.maximum(y_coord[i], y_coord[ordered[1:]])
|
||||
xx2 = np.minimum(x_coord[i] + width[i], x_coord[ordered[1:]] + width[ordered[1:]])
|
||||
yy2 = np.minimum(y_coord[i] + height[i], y_coord[ordered[1:]] + height[ordered[1:]])
|
||||
|
||||
width1 = np.maximum(0.0, xx2 - xx1 + 1)
|
||||
height1 = np.maximum(0.0, yy2 - yy1 + 1)
|
||||
intersection = width1 * height1
|
||||
union = (areas[i] + areas[ordered[1:]] - intersection)
|
||||
|
||||
# Compute the Intersection over Union (IoU) score:
|
||||
iou = intersection / union
|
||||
|
||||
# The goal of the NMS algorithm is to reduce the number of adjacent bounding-box
|
||||
# candidates to a minimum. In this step, we keep only those elements whose overlap
|
||||
# with the current bounding box is lower than the threshold:
|
||||
indexes = np.where(iou <= self.nms_threshold)[0]
|
||||
ordered = ordered[indexes + 1]
|
||||
|
||||
keep = np.array(keep)
|
||||
return keep
|
||||
|
||||
class PostprocessYOLOWrapper(object):
|
||||
"""This class encapsulates things needed to run yolo."""
|
||||
"""Reference from here https://github.com/jkjung-avt/tensorrt_demos/blob/3fb15c908b155d5edc1bf098c6b8c31886cd8e8d/utils/yolo.py"""
|
||||
|
||||
def _init_yolov3_postprocessor(self):
|
||||
h, w = self.input_shape
|
||||
filters = (self.category_num + 5) * 3
|
||||
if 'tiny' in self.model:
|
||||
self.output_shapes = [(1, filters, h // 32, w // 32),
|
||||
(1, filters, h // 16, w // 16)]
|
||||
else:
|
||||
self.output_shapes = [(1, filters, h // 32, w // 32),
|
||||
(1, filters, h // 16, w // 16),
|
||||
(1, filters, h // 8, w // 8)]
|
||||
if 'tiny' in self.model:
|
||||
postprocessor_args = {
|
||||
# A list of 2 three-dimensional tuples for the Tiny YOLO masks
|
||||
'yolo_masks': [(3, 4, 5), (0, 1, 2)],
|
||||
# A list of 6 two-dimensional tuples for the Tiny YOLO anchors
|
||||
'yolo_anchors': [(10, 14), (23, 27), (37, 58),
|
||||
(81, 82), (135, 169), (344, 319)],
|
||||
# Threshold for non-max suppression algorithm, float
|
||||
# value between 0 and 1
|
||||
'nms_threshold': 0.5,
|
||||
'yolo_input_resolution': self.input_shape,
|
||||
'category_num': self.category_num
|
||||
}
|
||||
else:
|
||||
postprocessor_args = {
|
||||
# A list of 3 three-dimensional tuples for the YOLO masks
|
||||
'yolo_masks': [(6, 7, 8), (3, 4, 5), (0, 1, 2)],
|
||||
# A list of 9 two-dimensional tuples for the YOLO anchors
|
||||
'yolo_anchors': [(10, 13), (16, 30), (33, 23),
|
||||
(30, 61), (62, 45), (59, 119),
|
||||
(116, 90), (156, 198), (373, 326)],
|
||||
# Threshold for non-max suppression algorithm, float
|
||||
# value between 0 and 1
|
||||
'nms_threshold': 0.5,
|
||||
'yolo_input_resolution': self.input_shape,
|
||||
'category_num': self.category_num
|
||||
}
|
||||
self.postprocessor = PostprocessYOLO(**postprocessor_args)
|
||||
|
||||
def __init__(self, model, input_shape, category_num=80):
|
||||
self.model = model
|
||||
self.input_shape = input_shape
|
||||
self.category_num = category_num
|
||||
self.postprocessor = None
|
||||
self._init_yolov3_postprocessor()
|
||||
|
|
@ -65,8 +65,68 @@ def yolov3_preprocess_func(images_folder, height, width, start_index=0, size_lim
|
|||
batch_data = np.concatenate(np.expand_dims(unconcatenated_batch_data, axis=0), axis=0)
|
||||
return batch_data, batch_filenames, image_size_list
|
||||
|
||||
def yolov3_variant_preprocess_func(images_folder, height, width, start_index=0, size_limit=0):
|
||||
'''
|
||||
Loads a batch of images and preprocess them
|
||||
parameter images_folder: path to folder storing images
|
||||
parameter height: image height in pixels
|
||||
parameter width: image width in pixels
|
||||
parameter size_limit: number of images to load. Default is 0 which means all images are picked.
|
||||
return: list of matrices characterizing multiple images
|
||||
'''
|
||||
|
||||
def yolov3_vision_preprocess_func(images_folder, height, width, start_index=0, size_limit=0):
|
||||
# reference from here:
|
||||
# https://github.com/jkjung-avt/tensorrt_demos/blob/3fb15c908b155d5edc1bf098c6b8c31886cd8e8d/utils/yolo.py#L60
|
||||
def _preprocess_yolo(img, input_shape):
|
||||
"""Preprocess an image before TRT YOLO inferencing.
|
||||
# Args
|
||||
img: int8 numpy array of shape (img_h, img_w, 3)
|
||||
input_shape: a tuple of (H, W)
|
||||
# Returns
|
||||
preprocessed img: float32 numpy array of shape (3, H, W)
|
||||
"""
|
||||
img = cv2.resize(img, (input_shape[1], input_shape[0]))
|
||||
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
||||
img = img.transpose((2, 0, 1)).astype(np.float32)
|
||||
img /= 255.0
|
||||
return img
|
||||
|
||||
image_names = os.listdir(images_folder)
|
||||
if start_index >= len(image_names):
|
||||
return np.asanyarray([]), np.asanyarray([]), np.asanyarray([])
|
||||
elif size_limit > 0 and len(image_names) >= size_limit:
|
||||
end_index = start_index + size_limit
|
||||
if end_index > len(image_names):
|
||||
end_index = len(image_names)
|
||||
|
||||
batch_filenames = [image_names[i] for i in range(start_index, end_index)]
|
||||
else:
|
||||
batch_filenames = image_names
|
||||
|
||||
unconcatenated_batch_data = []
|
||||
image_size_list = []
|
||||
|
||||
print(batch_filenames)
|
||||
print("size: %s" % str(len(batch_filenames)))
|
||||
|
||||
for image_name in batch_filenames:
|
||||
image_filepath = images_folder + '/' + image_name
|
||||
model_image_size = (height, width)
|
||||
|
||||
img = cv2.imread(image_filepath)
|
||||
image_data = _preprocess_yolo(img, tuple(model_image_size))
|
||||
image_data = np.ascontiguousarray(image_data)
|
||||
image_data = np.expand_dims(image_data, 0)
|
||||
unconcatenated_batch_data.append(image_data)
|
||||
_height, _width, _ = img.shape
|
||||
image_size_list.append(img.shape[0:2]) # img.shape is h, w, c
|
||||
|
||||
batch_data = np.concatenate(np.expand_dims(unconcatenated_batch_data, axis=0), axis=0)
|
||||
return batch_data, batch_filenames, image_size_list
|
||||
|
||||
|
||||
# This is for special tuned yolov3 model
|
||||
def yolov3_variant_2_preprocess_func(images_folder, height, width, start_index=0, size_limit=0):
|
||||
def letterbox(img, new_shape=(416, 416), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
|
||||
# Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
|
||||
shape = img.shape[:2] # current shape [height, width]
|
||||
|
|
|
|||
|
|
@ -77,10 +77,12 @@ class ONNXCalibrater:
|
|||
value_infos = {vi.name: vi for vi in model.graph.value_info}
|
||||
value_infos.update({ot.name: ot for ot in model.graph.output})
|
||||
value_infos.update({it.name: it for it in model.graph.input})
|
||||
initializer = set(init.name for init in model.graph.initializer)
|
||||
|
||||
added_nodes = []
|
||||
added_outputs = []
|
||||
tensors_to_calibrate = set()
|
||||
tensor_type_to_calibrate = set([TensorProto.FLOAT, TensorProto.FLOAT16])
|
||||
|
||||
for node in model.graph.node:
|
||||
should_be_calibrate = ((node.op_type in self.calibrate_op_types) and
|
||||
|
|
@ -89,8 +91,7 @@ class ONNXCalibrater:
|
|||
for tensor_name in itertools.chain(node.input, node.output):
|
||||
if tensor_name in value_infos.keys():
|
||||
vi = value_infos[tensor_name]
|
||||
if vi.type.HasField('tensor_type') and vi.type.tensor_type.elem_type == TensorProto.FLOAT and (
|
||||
tensor_name not in model.graph.initializer):
|
||||
if vi.type.HasField('tensor_type') and (vi.type.tensor_type.elem_type in tensor_type_to_calibrate) and (tensor_name not in initializer):
|
||||
tensors_to_calibrate.add(tensor_name)
|
||||
|
||||
# If augmenting all ops, it's possible that some nodes' input value are 0.
|
||||
|
|
@ -332,10 +333,10 @@ def calculate_calibration_data(model,
|
|||
augmented_model_path=augmented_model_path)
|
||||
|
||||
if not os.path.exists(augmented_model_path):
|
||||
augmented_model = calibrator.augment_graph(augment_all_ops=True)
|
||||
augmented_model = calibrator.augment_graph()
|
||||
onnx.save(augmented_model, augmented_model_path)
|
||||
|
||||
calibrator.get_intermediate_outputs(providers=["CUDAExecutionProvider"])
|
||||
calibrator.get_intermediate_outputs(providers=["CUDAExecutionProvider"], ort_graph_optimization_enable=False)
|
||||
|
||||
|
||||
def generate_calibration_table(calibrator,
|
||||
|
|
|
|||
Loading…
Reference in a new issue