yolov5量化部署（基于openvino和tensorrt）

yolov5 openvino量化部署

首先，下载YOLOv5源码，安装YOLOv5和OpenVINO的python依赖。

git clone https://github.com/ultralytics/yolov5.git
pip install -r requirements.txt && pip install openvino openvino-dev

然后，通过YOLOv5提供的export.py将预训练的Pytorch模型转换为OpenVINO FP32 IR模型。

python export.py --weights yolov5n.pt --imgsz 640 --batch-size 1 --include openvino

下面的量化代码改编自：https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/111-yolov5-quantization-migration

from pathlib import Path
from utils.dataloaders import create_dataloader
from utils.general import check_dataset
from export import attempt_load, yaml_save
from val import run as validation_fn
from openvino.tools import mo
from openvino.runtime import serialize
from openvino.tools.pot.api import DataLoader
from openvino.tools.pot.engines.ie_engine import IEEngine
from openvino.tools.pot.graph import load_model
from openvino.tools.pot.pipeline.initializer import create_pipeline
from openvino.tools.pot.graph.model_utils import compress_model_weights
from openvino.tools.pot.graph import load_model, save_modelIMAGE_SIZE = 640
MODEL_NAME = "yolov5n"
DATASET_CONFIG = "./data/coco128.yaml"class YOLOv5POTDataLoader(DataLoader):'''Inherit from DataLoader function and implement for YOLOv5.'''def __init__(self, data_source):super().__init__({})self._data_loader = data_sourceself._data_iter = iter(self._data_loader)def __len__(self):return len(self._data_loader.dataset)def __getitem__(self, item):try:batch_data = next(self._data_iter)except StopIteration:self._data_iter = iter(self._data_loader)batch_data = next(self._data_iter)im, target, path, shape = batch_dataim = im.float()im /= 255nb, _, height, width = im.shapeimg = im.cpu().detach().numpy()target = target.cpu().detach().numpy()annotation = dict()annotation["image_path"] = pathannotation["target"] = targetannotation["batch_size"] = nbannotation["shape"] = shapeannotation["width"] = widthannotation["height"] = heightannotation["img"] = imgreturn (item, annotation), imgif __name__ == "__main__":'''Conversion of the YOLOv5 model to OpenVINO'''onnx_path = f"./{MODEL_NAME}.onnx"# fp32 IR modelfp32_path = f"./FP32_openvino_model/{MODEL_NAME}_fp32.xml"print(f"Export ONNX to OpenVINO FP32 IR to: {fp32_path}")model = mo.convert_model(onnx_path)serialize(model, fp32_path)# fp16 IR modelfp16_path = f"./FP16_openvino_model/{MODEL_NAME}_fp16.xml"print(f"Export ONNX to OpenVINO FP16 IR to: {fp16_path}")model = mo.convert_model(onnx_path, compress_to_fp16=True)serialize(model, fp16_path)'''Prepare dataset for quantization'''data = check_dataset(DATASET_CONFIG)data_source = create_dataloader(data["val"], imgsz=640, batch_size=1, stride=32, pad=0.5, workers=0)[0]pot_data_loader = YOLOv5POTDataLoader(data_source)'''Configure quantization pipeline'''algorithms_config = [{"name": "DefaultQuantization","params": {"preset": "mixed","stat_subset_size": 300,"target_device": "CPU"},}]engine_config = {"device": "CPU"}model_config = {"model_name": f"{MODEL_NAME}","model": fp32_path,"weights": fp32_path.replace(".xml", ".bin"),}pot_model = load_model(model_config)engine = IEEngine(config=engine_config, data_loader=pot_data_loader)pipeline = create_pipeline(algorithms_config, engine)'''Perform model optimization'''compressed_model = pipeline.run(pot_model)compress_model_weights(compressed_model)optimized_save_dir = Path(f"./POT_INT8_openvino_model/")save_model(compressed_model, optimized_save_dir, model_config["model_name"] + "_int8")pot_int8_path = f"{optimized_save_dir}/{MODEL_NAME}_int8.xml"'''Compare accuracy FP32 and INT8 models'''model = attempt_load(f"./{MODEL_NAME}.pt", device="cpu", inplace=True, fuse=True) metadata = {"stride": int(max(model.stride)), "names": model.names}  # model metadatayaml_save(Path(pot_int8_path).with_suffix(".yaml"), metadata)yaml_save(Path(fp32_path).with_suffix(".yaml"), metadata)print("Checking the accuracy of the original model:")fp32_metrics = validation_fn(data=DATASET_CONFIG,weights=Path(fp32_path).parent,batch_size=1,workers=0,plots=False,device="cpu",iou_thres=0.65,)fp32_ap5 = fp32_metrics[0][2]fp32_ap_full = fp32_metrics[0][3]print(f"mAP@.5 = {fp32_ap5}")print(f"mAP@.5:.95 = {fp32_ap_full}")print("Checking the accuracy of the POT int8 model:")int8_metrics = validation_fn(data=DATASET_CONFIG,weights=Path(pot_int8_path).parent,batch_size=1,workers=0,plots=False,device="cpu",iou_thres=0.65,)pot_int8_ap5 = int8_metrics[0][2]pot_int8_ap_full = int8_metrics[0][3]print(f"mAP@.5 = {pot_int8_ap5}")print(f"mAP@.5:.95 = {pot_int8_ap_full}")

python推理：

import cv2
import numpy as np
from openvino.inference_engine import IECorenames = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light','fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow','elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee','skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard','tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple','sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch','potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard','cell phone','microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors','teddy bear', 'hair drier', 'toothbrush']
conf_thres = 0.5
nms_thres = 0.5
model_path = "yolov5n-f32.onnx" //onnx推理支持fp32和fp16
model_xml = r"./POT_INT8_openvino_model/yolov5n_int8.xml" //IR推理支持fp32、fp16和int8
model_bin = r"./POT_INT8_openvino_model/yolov5n_int8.bin"def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), scaleup=False, stride=32):shape = img.shape[:2]  # current shape [height, width]if isinstance(new_shape, int):new_shape = (new_shape, new_shape)r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])if not scaleup:  # only scale down, do not scale up (for better test mAP)r = min(r, 1.0)ratio = r  # width, height ratiosnew_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh paddingdw /= 2dh /= 2if shape[::-1] != new_unpad:  # resizeimg = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))left, right = int(round(dw - 0.1)), int(round(dw + 0.1))img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add borderreturn img,ratio,(dw,dh)def iou(b1,b2):b1_x1, b1_y1, b1_x2, b1_y2 = b1[0], b1[1], b1[2], b1[3]b2_x1, b2_y1, b2_x2, b2_y2 = b2[:,0], b2[:,1], b2[:,2], b2[:,3]inter_rect_x1 = np.maximum(b1_x1, b2_x1)inter_rect_y1 = np.maximum(b1_y1, b2_y1)inter_rect_x2 = np.minimum(b1_x2, b2_x2)inter_rect_y2 = np.minimum(b1_y2, b2_y2)inter_area = np.maximum(inter_rect_x2 - inter_rect_x1, 0) * np.maximum(inter_rect_y2 - inter_rect_y1, 0)area_b1 = (b1_x2 - b1_x1) * (b1_y2 - b1_y1)area_b2 = (b2_x2 - b2_x1) * (b2_y2 - b2_y1)iou = inter_area / np.maximum((area_b1+area_b2-inter_area),1e-6)return ioudef non_max_suppression(boxes, conf_thres=0.5, nms_thres=0.4, ratio=1, pad=(20,20)):# 取出batch_sizebs = np.shape(boxes)[0]# xywh___ to____ xyxyshape_boxes = np.zeros_like(boxes[:,:,:4])shape_boxes[:, :, 0] = boxes[:, :, 0] - boxes[:, :, 2] / 2shape_boxes[:, :, 1] = boxes[:, :, 1] - boxes[:, :, 3] / 2shape_boxes[:, :, 2] = boxes[:, :, 0] + boxes[:, :, 2] / 2shape_boxes[:, :, 3] = boxes[:, :, 1] + boxes[:, :, 3] / 2boxes[:, :, :4] = shape_boxesboxes[:, :, 5:] *= boxes[:, :, 4:5]# output存放每一张图片的预测结果，推理阶段一般是一张图片output = []for i in range(bs):predictions = boxes[i]  # 预测位置xyxy  shape==(12700,85)score = np.max(predictions[:, 5:], axis=-1)# score = predictions[:,4]  # 存在物体置信度,shape==12700mask = score > conf_thres  # 物体置信度阈值mask==[False,False,True......],shape==12700,True将会被保留，False列将会被删除detections = predictions[mask]  # 第一次筛选  shape==(115,85)class_conf = np.expand_dims(np.max(detections[:,5:],axis=-1),axis=-1)  # 获取每个预测框预测的类别置信度class_pred = np.expand_dims(np.argmax(detections[:,5:],axis=-1),axis=-1)  # 获取每个预测框的类别下标# 结果堆叠，(num_boxes,位置信息4+包含物体概率1+类别置信度1+类别序号1)detections = np.concatenate([detections[:,:4],class_conf,class_pred],axis=-1)  # shape=(numbox,7)unique_class = np.unique(detections[:,-1])  # 取出包含的所有类别if len(unique_class)==0:continuebest_box = []for c in unique_class:# 取出类别为c的预测结果cls_mask = detections[:,-1] == cdetection = detections[cls_mask] # shape=(82,7)# 包含物体类别概率从高至低排列scores = detection[:,4]arg_sort = np.argsort(scores)[::-1]  # 返回的是索引detection = detection[arg_sort]while len(detection) != 0:best_box.append(detection[0])if len(detection) == 1:break# 计算当前置信度最大的框和其它预测框的iouious = iou(best_box[-1],detection[1:])detection = detection[1:][ious < nms_thres]  # 小于nms_thres将被保留，每一轮至少减少一个output.append(best_box)boxes_loc = []conf_loc = []class_loc = []if len(output):for i in range(len(output)):pred = output[i]for i, det in enumerate(pred):if len(det):# 将框坐标调整回原始图像中det[0] = (det[0] - pad[0]) / ratiodet[2] = (det[2] - pad[0]) / ratiodet[1] = (det[1] - pad[1]) / ratiodet[3] = (det[3] - pad[1]) / ratioboxes_loc.append([det[0],det[1],det[2],det[3]])conf_loc.append(det[4])class_loc.append(det[5])return boxes_loc,conf_loc,class_locdef plot_box(img,boxes,conf,clas_id,line_thickness=3,names=None):# 画位置框tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1c1, c2 = (int(boxes[0]), int(boxes[1])), (int(boxes[2]),int(boxes[3]))cv2.rectangle(img, c1, c2, [0, 0 ,255], thickness=tl, lineType=cv2.LINE_AA)# 画类别信息框label = f'{names[int(clas_id)]} {conf:.2f}'tf = max(tl - 1, 1)  # label字体的线宽 font thicknesst_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]c2 = (c1[0] + t_size[0], c1[1] - t_size[1] - 3)cv2.rectangle(img, c1, c2, [255, 0 ,0], -1, cv2.LINE_AA)cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)if __name__ == '__main__':ie = IECore()#net = ie.read_network(model=model_path)net = ie.read_network(model=model_xml, weights=model_bin)exec_net = ie.load_network(network=net, device_name="CPU")input_layer = next(iter(net.input_info))frame = cv2.imread("bus.jpg") img, ratio, (dw,dh) = letterbox(frame)blob = cv2.dnn.blobFromImage(np.ascontiguousarray(img), 1/255.0, (img.shape[0], img.shape[1]), swapRB=True, crop=False)infer_request_handle=exec_net.start_async(request_id=0,inputs={input_layer: blob})if infer_request_handle.wait(-1) == 0:res = infer_request_handle.output_blobs["output0"]outs = res.bufferboxes_loc,conf_loc,class_loc = non_max_suppression(outs, conf_thres=conf_thres, nms_thres=nms_thres,ratio=ratio, pad=(dw,dh))for i in range(len(boxes_loc)):boxes = boxes_loc[i]conf = conf_loc[i]clas_id = class_loc[i]plot_box(frame, boxes, conf, clas_id, line_thickness=3, names=names)cv2.imshow("result", frame)cv2.waitKey(0)cv2.destroyAllWindows()

或者利用yolov5自带的detect.py：

python detect.py --weights ./POT_INT8_openvino_model

C++推理：（参考基于OpenVNO C++ API部署YOLOv5模型）

#include <iostream>
#include <string>
#include <openvino/openvino.hpp>
#include <opencv2/opencv.hpp>   /* ---------  Please modify the path of yolov5 model and image -----------*/
std::string model_file = "yolov5n_int8.xml";//可以改成yolov5n_fp16.xml或yolov5n_fp32.xml或yolov5n-f32.onnx（支持fp32、fp18、int8 IR推理和fp32 onnx推理）
std::string image_file = "bus.jpg";const std::vector<std::string> class_names = {"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light","fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow","elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee","skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard","tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple","sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch","potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone","microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear","hair drier", "toothbrush" };cv::Mat letterbox(cv::Mat& img, std::vector<float>& paddings, std::vector<int> new_shape = { 640, 640 })
{// Get current image shape [height, width]int img_h = img.rows;int img_w = img.cols;// Compute scale ratio(new / old) and target resized shapefloat scale = std::min(new_shape[1] * 1.0 / img_h, new_shape[0] * 1.0 / img_w);int resize_h = int(round(img_h * scale));int resize_w = int(round(img_w * scale));paddings[0] = scale;// Compute paddingint pad_h = new_shape[1] - resize_h;int pad_w = new_shape[0] - resize_w;// Resize and pad image while meeting stride-multiple constraintscv::Mat resized_img;cv::resize(img, resized_img, cv::Size(resize_w, resize_h));// divide padding into 2 sidesfloat half_h = pad_h * 1.0 / 2;float half_w = pad_w * 1.0 / 2;paddings[1] = half_h;paddings[2] = half_w;// Compute padding boarderint top = int(round(half_h - 0.1));int bottom = int(round(half_h + 0.1));int left = int(round(half_w - 0.1));int right = int(round(half_w + 0.1));// Add bordercv::copyMakeBorder(resized_img, resized_img, top, bottom, left, right, 0, cv::Scalar(114, 114, 114));return resized_img;
}int main(int argc, char* argv[])
{// -------- Get OpenVINO runtime version --------std::cout << ov::get_openvino_version().description << ':' << ov::get_openvino_version().buildNumber << std::endl;// -------- Step 1. Initialize OpenVINO Runtime Core --------ov::Core core;// -------- Step 2. Compile the Model --------auto compiled_model = core.compile_model(model_file, "CPU"); // -------- Step 3. Create an Inference Request --------ov::InferRequest infer_request = compiled_model.create_infer_request();clock_t start = clock();// -------- Step 4. Read a picture file and do the preprocess --------cv::Mat img = cv::imread(image_file); //Load a picture into memorystd::vector<float> paddings(3);       //scale, half_h, half_wcv::Mat resized_img = letterbox(img, paddings); //resize to (640,640) by letterboxcv::Mat blob = cv::dnn::blobFromImage(resized_img, 1 / 255.0, cv::Size(640, 640), cv::Scalar(0, 0, 0), true);   // BGR->RGB, u8(0-255)->f32(0.0-1.0), HWC->NCHW// -------- Step 5. Feed the blob into the input node of YOLOv5 ------- auto input_port = compiled_model.input(); // Get input port for model with one inputov::Tensor input_tensor(input_port.get_element_type(), input_port.get_shape(), blob.ptr(0)); // Create tensor from external memoryinfer_request.set_input_tensor(input_tensor); // Set input tensor for model with one input// -------- Step 6. Start inference --------for (size_t i = 0; i < 100; i++)infer_request.infer();// -------- Step 7. Get the inference result --------auto output = infer_request.get_output_tensor(0);auto output_shape = output.get_shape();std::cout << "The shape of output tensor:" << output_shape << std::endl;cv::Mat output_buffer(output_shape[1], output_shape[2], CV_32F, output.data()); // -------- Step 8. Post-process the inference result -----------float conf_threshold = 0.25;float nms_threshold = 0.5;std::vector<cv::Rect> boxes;std::vector<int> class_ids;std::vector<float> class_scores;std::vector<float> confidences;for (int i = 0; i < output_buffer.rows; i++) {float confidence = output_buffer.at<float>(i, 4);if (confidence < conf_threshold) continue;cv::Mat classes_scores = output_buffer.row(i).colRange(5, 85);cv::Point class_id;double score;cv::minMaxLoc(classes_scores, NULL, &score, NULL, &class_id);if (score > 0.25){float cx = output_buffer.at<float>(i, 0);float cy = output_buffer.at<float>(i, 1);float w = output_buffer.at<float>(i, 2);float h = output_buffer.at<float>(i, 3);int left = static_cast<int>((cx - 0.5 * w - paddings[2]) / paddings[0]);int top = static_cast<int>((cy - 0.5 * h - paddings[1]) / paddings[0]);int width = static_cast<int>(w / paddings[0]);int height = static_cast<int>(h / paddings[0]);cv::Rect box;box.x = left;box.y = top;box.width = width;box.height = height;boxes.push_back(box);class_ids.push_back(class_id.x);class_scores.push_back(score);confidences.push_back(confidence);}}// NMSstd::vector<int> indices;cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, indices);clock_t end = clock();std::cout << end - start << std::endl;// -------- Step 8. Visualize the detection results -----------for (size_t i = 0; i < indices.size(); i++) {int index = indices[i];int class_id = class_ids[index];cv::rectangle(img, boxes[index], cv::Scalar(0, 0, 255), 2, 8);std::string label = class_names[class_id] + ":" + std::to_string(class_scores[index]);cv::putText(img, label, cv::Point(boxes[index].tl().x, boxes[index].tl().y - 10), cv::FONT_HERSHEY_SIMPLEX, .5, cv::Scalar(255, 0, 0));}cv::imshow("YOLOv5 OpenVINO Inference C++ Demo", img);cv::waitKey(0);return 0;
}

C++在i7-12700 CPU下推理fp32、fp16、int8模型循环100轮，耗时如下（各跑3次）：
yolov5n_fp32：1599ms 2040ms 1514ms
yolov5n_fp16：1505ms 2078ms 1514ms
yolov5n_int8： 856ms 861ms 852ms

fp32和fp16模型推理耗时差不多，int8能缩短推理耗时到一半左右。

yolov5 tensorrt量化部署

方法一：wts转trt的硬解析方案
这个借助的是大佬的工程：https://github.com/wang-xinyu/tensorrtx/tree/master/yolov5
python和c++的推理代码都有。不得不佩服人家写的确实很详细了，有这么好的轮子干嘛不直接拿来用呢，哈哈。
配置环境的过程还参考了博文：windows上配置TensorRT yolov5 -6.0部署 tensorrtx视频流推理
LZ测试了一下yolov5各种模型单张图片的推理耗时如下（C++，RTX3070 gpu ）：
yolov5n-int8： 2ms 1ms
yolov5s-int8： 2ms 1ms
yolov5m-int8：3ms 2ms
yolov5l-int8： 4ms 3ms
yolov5x-int8 ：7ms 6ms

yolov5n-fp16： 1ms 1ms
yolov5s-fp16： 2ms 2ms
yolov5m-fp16：4ms 3ms
yolov5l-fp16： 6ms 5ms
yolov5x-fp16：10ms 9ms

yolov5n-fp32： 424ms 2ms
yolov5s-fp32： 389ms 4ms
yolov5m-fp32：401ms 9ms
yolov5l-fp32： 422ms 17ms
yolov5x-fp32： 30ms 28ms
其中在我的机器上，fp32模型的yolov5n-yolov5l版本首次推理时间较长，不清楚是什么原因。

方法二：onnx解析trt的api方案
代码参考课程《深度学习-TensorRT模型部署实战》，先将onnx转换成tensorrt的engine，再进行推理。

#include <NvInfer.h>
#include <NvInferRuntime.h>#include "onnx-tensorrt-release-8.0/NvOnnxParser.h"#include <cuda_runtime.h>#include <stdio.h>
#include <math.h>#include <iostream>
#include <fstream>
#include <vector>
#include <memory>
#include <functional>#include <opencv2/opencv.hpp>using namespace std;#define checkRuntime(op)  __check_cuda_runtime((op), #op, __FILE__, __LINE__)bool __check_cuda_runtime(cudaError_t code, const char* op, const char* file, int line){if(code != cudaSuccess){    const char* err_name = cudaGetErrorName(code);    const char* err_message = cudaGetErrorString(code);  printf("runtime error %s:%d  %s failed. \n  code = %s, message = %s\n", file, line, op, err_name, err_message);   return false;}return true;
}inline const char* severity_string(nvinfer1::ILogger::Severity t){switch(t){case nvinfer1::ILogger::Severity::kINTERNAL_ERROR: return "internal_error";case nvinfer1::ILogger::Severity::kERROR:   return "error";case nvinfer1::ILogger::Severity::kWARNING: return "warning";case nvinfer1::ILogger::Severity::kINFO:    return "info";case nvinfer1::ILogger::Severity::kVERBOSE: return "verbose";default: return "unknow";}
}static const char* cocolabels[] = {"person", "bicycle", "car", "motorcycle", "airplane","bus", "train", "truck", "boat", "traffic light", "fire hydrant","stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse","sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack","umbrella", "handbag", "tie", "suitcase", "frisbee", "skis","snowboard", "sports ball", "kite", "baseball bat", "baseball glove","skateboard", "surfboard", "tennis racket", "bottle", "wine glass","cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich","orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake","chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv","laptop", "mouse", "remote", "keyboard", "cell phone", "microwave","oven", "toaster", "sink", "refrigerator", "book", "clock", "vase","scissors", "teddy bear", "hair drier", "toothbrush"
};static std::tuple<uint8_t, uint8_t, uint8_t> hsv2bgr(float h, float s, float v){const int h_i = static_cast<int>(h * 6);const float f = h * 6 - h_i;const float p = v * (1 - s);const float q = v * (1 - f*s);const float t = v * (1 - (1 - f) * s);float r, g, b;switch (h_i) {case 0:r = v; g = t; b = p;break;case 1:r = q; g = v; b = p;break;case 2:r = p; g = v; b = t;break;case 3:r = p; g = q; b = v;break;case 4:r = t; g = p; b = v;break;case 5:r = v; g = p; b = q;break;default:r = 1; g = 1; b = 1;break;}return make_tuple(static_cast<uint8_t>(b * 255), static_cast<uint8_t>(g * 255), static_cast<uint8_t>(r * 255));
}static std::tuple<uint8_t, uint8_t, uint8_t> random_color(int id){float h_plane = ((((unsigned int)id << 2) ^ 0x937151) % 100) / 100.0f;;float s_plane = ((((unsigned int)id << 3) ^ 0x315793) % 100) / 100.0f;return hsv2bgr(h_plane, s_plane, 1);
}class TRTLogger : public nvinfer1::ILogger{public:virtual void log(Severity severity, nvinfer1::AsciiChar const* msg) noexcept override{if(severity <= Severity::kWARNING){if(severity == Severity::kWARNING){printf("\033[33m%s: %s\033[0m\n", severity_string(severity), msg);}else if(severity <= Severity::kERROR){printf("\033[31m%s: %s\033[0m\n", severity_string(severity), msg);}else{printf("%s: %s\n", severity_string(severity), msg);}}}
} logger;template<typename _T>
shared_ptr<_T> make_nvshared(_T* ptr){return shared_ptr<_T>(ptr, [](_T* p){p->destroy();});
}bool build_model() {TRTLogger logger;// 这是基本需要的组件auto builder = make_nvshared(nvinfer1::createInferBuilder(logger));auto config = make_nvshared(builder->createBuilderConfig());auto network = make_nvshared(builder->createNetworkV2(1));// 通过onnxparser解析器解析的结果会填充到network中，类似addConv的方式添加进去auto parser = make_nvshared(nvonnxparser::createParser(*network, logger));if (!parser->parseFromFile("yolov5s-7.0.onnx", 1)) {printf("Failed to parse yolov5s.onnx\n");// 注意这里的几个指针还没有释放，是有内存泄漏的，后面考虑更优雅的解决return false;}int maxBatchSize = 1;printf("Workspace Size = %.2f MB\n", (1 << 28) / 1024.0f / 1024.0f);config->setMaxWorkspaceSize(1 << 28);// 如果模型有多个输入，则必须多个profileauto profile = builder->createOptimizationProfile();auto input_tensor = network->getInput(0);auto input_dims = input_tensor->getDimensions();// 配置最小、最优、最大范围input_dims.d[0] = 1;profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kMIN, input_dims);profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kOPT, input_dims);input_dims.d[0] = maxBatchSize;profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kMAX, input_dims);config->addOptimizationProfile(profile);auto engine = make_nvshared(builder->buildEngineWithConfig(*network, *config));if (engine == nullptr) {printf("Build engine failed.\n");return false;}// 将模型序列化，并储存为文件auto model_data = make_nvshared(engine->serialize());FILE* f = fopen("yolov5s-7.0.trt", "wb");fwrite(model_data->data(), 1, model_data->size(), f);fclose(f);// 卸载顺序按照构建顺序倒序printf("Build Done.\n");return true;
}vector<unsigned char> load_file(const string& file){ifstream in(file, ios::in | ios::binary);if (!in.is_open())return {};in.seekg(0, ios::end);size_t length = in.tellg();std::vector<uint8_t> data;if (length > 0){in.seekg(0, ios::beg);data.resize(length);in.read((char*)&data[0], length);}in.close();return data;
}void inference(){TRTLogger logger;auto engine_data = load_file("yolov5s-7.0.trt");auto runtime   = make_nvshared(nvinfer1::createInferRuntime(logger));auto engine = make_nvshared(runtime->deserializeCudaEngine(engine_data.data(), engine_data.size()));if(engine == nullptr){printf("Deserialize cuda engine failed.\n");runtime->destroy();return;}if(engine->getNbBindings() != 2){printf("你的onnx导出有问题，必须是1个输入和1个输出，你这明显有：%d个输出.\n", engine->getNbBindings() - 1);return;}cudaStream_t stream = nullptr;checkRuntime(cudaStreamCreate(&stream));auto execution_context = make_nvshared(engine->createExecutionContext());int input_batch = 1;int input_channel = 3;int input_height = 640;int input_width = 640;int input_numel = input_batch * input_channel * input_height * input_width;float* input_data_host = nullptr;float* input_data_device = nullptr;checkRuntime(cudaMallocHost(&input_data_host, input_numel * sizeof(float)));checkRuntime(cudaMalloc(&input_data_device, input_numel * sizeof(float)));auto image = cv::imread("bus.jpg");float scale_x = input_width / (float)image.cols;float scale_y = input_height / (float)image.rows;float scale = std::min(scale_x, scale_y);float i2d[6], d2i[6];i2d[0] = scale;  i2d[1] = 0;  i2d[2] = (-scale * image.cols + input_width + scale  - 1) * 0.5;i2d[3] = 0;  i2d[4] = scale;  i2d[5] = (-scale * image.rows + input_height + scale - 1) * 0.5;cv::Mat m2x3_i2d(2, 3, CV_32F, i2d);  cv::Mat m2x3_d2i(2, 3, CV_32F, d2i);  cv::invertAffineTransform(m2x3_i2d, m2x3_d2i); cv::Mat input_image(input_height, input_width, CV_8UC3);cv::warpAffine(image, input_image, m2x3_i2d, input_image.size(), cv::INTER_LINEAR, cv::BORDER_CONSTANT, cv::Scalar::all(114));cv::imwrite("input-image.jpg", input_image);int image_area = input_image.cols * input_image.rows;unsigned char* pimage = input_image.data;float* phost_b = input_data_host + image_area * 0;float* phost_g = input_data_host + image_area * 1;float* phost_r = input_data_host + image_area * 2;for(int i = 0; i < image_area; ++i, pimage += 3){*phost_r++ = pimage[0] / 255.0f;*phost_g++ = pimage[1] / 255.0f;*phost_b++ = pimage[2] / 255.0f;}///checkRuntime(cudaMemcpyAsync(input_data_device, input_data_host, input_numel * sizeof(float), cudaMemcpyHostToDevice, stream));auto output_dims = engine->getBindingDimensions(1);int output_numbox = output_dims.d[1];int output_numprob = output_dims.d[2];int num_classes = output_numprob - 5;int output_numel = input_batch * output_numbox * output_numprob;float* output_data_host = nullptr;float* output_data_device = nullptr;checkRuntime(cudaMallocHost(&output_data_host, sizeof(float) * output_numel));checkRuntime(cudaMalloc(&output_data_device, sizeof(float) * output_numel));auto input_dims = engine->getBindingDimensions(0);input_dims.d[0] = input_batch;execution_context->setBindingDimensions(0, input_dims);float* bindings[] = {input_data_device, output_data_device};bool success      = execution_context->enqueueV2((void**)bindings, stream, nullptr);checkRuntime(cudaMemcpyAsync(output_data_host, output_data_device, sizeof(float) * output_numel, cudaMemcpyDeviceToHost, stream));checkRuntime(cudaStreamSynchronize(stream));vector<vector<float>> bboxes;float confidence_threshold = 0.25;float nms_threshold = 0.5;for(int i = 0; i < output_numbox; ++i){float* ptr = output_data_host + i * output_numprob;float objness = ptr[4];if(objness < confidence_threshold)continue;float* pclass = ptr + 5;int label     = std::max_element(pclass, pclass + num_classes) - pclass;float prob    = pclass[label];float confidence = prob * objness;if(confidence < confidence_threshold)continue;float cx     = ptr[0];float cy     = ptr[1];float width  = ptr[2];float height = ptr[3];float left   = cx - width * 0.5;float top    = cy - height * 0.5;float right  = cx + width * 0.5;float bottom = cy + height * 0.5;float image_base_left   = d2i[0] * left   + d2i[2];float image_base_right  = d2i[0] * right  + d2i[2];float image_base_top    = d2i[0] * top    + d2i[5];float image_base_bottom = d2i[0] * bottom + d2i[5];bboxes.push_back({image_base_left, image_base_top, image_base_right, image_base_bottom, (float)label, confidence});}printf("decoded bboxes.size = %d\n", bboxes.size());std::sort(bboxes.begin(), bboxes.end(), [](vector<float>& a, vector<float>& b){return a[5] > b[5];});std::vector<bool> remove_flags(bboxes.size());std::vector<vector<float>> box_result;box_result.reserve(bboxes.size());auto iou = [](const vector<float>& a, const vector<float>& b){float cross_left   = std::max(a[0], b[0]);float cross_top    = std::max(a[1], b[1]);float cross_right  = std::min(a[2], b[2]);float cross_bottom = std::min(a[3], b[3]);float cross_area = std::max(0.0f, cross_right - cross_left) * std::max(0.0f, cross_bottom - cross_top);float union_area = std::max(0.0f, a[2] - a[0]) * std::max(0.0f, a[3] - a[1]) + std::max(0.0f, b[2] - b[0]) * std::max(0.0f, b[3] - b[1]) - cross_area;if(cross_area == 0 || union_area == 0) return 0.0f;return cross_area / union_area;};for(int i = 0; i < bboxes.size(); ++i){if(remove_flags[i]) continue;auto& ibox = bboxes[i];box_result.emplace_back(ibox);for(int j = i + 1; j < bboxes.size(); ++j){if(remove_flags[j]) continue;auto& jbox = bboxes[j];if(ibox[4] == jbox[4]){if(iou(ibox, jbox) >= nms_threshold)remove_flags[j] = true;}}}printf("box_result.size = %d\n", box_result.size());for(int i = 0; i < box_result.size(); ++i){auto& ibox = box_result[i];float left = ibox[0];float top = ibox[1];float right = ibox[2];float bottom = ibox[3];int class_label = ibox[4];float confidence = ibox[5];cv::Scalar color;tie(color[0], color[1], color[2]) = random_color(class_label);cv::rectangle(image, cv::Point(left, top), cv::Point(right, bottom), color, 3);auto name      = cocolabels[class_label];auto caption   = cv::format("%s %.2f", name, confidence);int text_width = cv::getTextSize(caption, 0, 1, 2, nullptr).width + 10;cv::rectangle(image, cv::Point(left-3, top-33), cv::Point(left + text_width, top), color, -1);cv::putText(image, caption, cv::Point(left, top-5), 0, 1, cv::Scalar::all(0), 2, 16);}cv::imwrite("image-draw.jpg", image);checkRuntime(cudaStreamDestroy(stream));checkRuntime(cudaFreeHost(input_data_host));checkRuntime(cudaFreeHost(output_data_host));checkRuntime(cudaFree(input_data_device));checkRuntime(cudaFree(output_data_device));
}int main(){if (!build_model()) {return -1;}inference();return 0;
}

其中build_model（将onnx转换成tensorrt的engine）也可以通过yolov5-7.0自带的export.py导出：

python export.py --include 'engine'