14、yolov5-6中数据预处理、模型输出nms单独计算、onnxruntime的gpu版本前向推理

1、数据输入预处理、模型输出nms等

import os
import torch
import time
import math
import cv2
import numpy as np
import torchvision
import argparsefrom load_onnx_model import *
def make_divisible(x, divisor):# Returns x evenly divisible by divisorreturn math.ceil(x / divisor) * divisordef check_img_size(imgsz, s=32, floor=0):# Verify image size is a multiple of stride s in each dimensionif isinstance(imgsz, int):  # integer i.e. img_size=640new_size = max(make_divisible(imgsz, int(s)), floor)else:  # list i.e. img_size=[640, 480]new_size = [max(make_divisible(x, int(s)), floor) for x in imgsz]if new_size != imgsz:print(f'WARNING: --img-size {imgsz} must be multiple of max stride {s}, updating to {new_size}')return new_sizedef letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):# Resize and pad image while meeting stride-multiple constraintsshape = im.shape[:2]  # current shape [height, width]if isinstance(new_shape, int):new_shape = (new_shape, new_shape)# Scale ratio (new / old)r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])if not scaleup:  # only scale down, do not scale up (for better val mAP)r = min(r, 1.0)# Compute paddingratio = r, r  # width, height ratiosnew_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh paddingif auto:  # minimum rectangledw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh paddingelif scaleFill:  # stretchdw, dh = 0.0, 0.0new_unpad = (new_shape[1], new_shape[0])ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratiosdw /= 2  # divide padding into 2 sidesdh /= 2if shape[::-1] != new_unpad:  # resizeim = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))left, right = int(round(dw - 0.1)), int(round(dw + 0.1))im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add borderreturn im, ratio, (dw, dh)class LoadImages:# YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`def __init__(self, path, img_size=640, stride=32, auto=True):self.img_size = img_sizeself.stride = strideself.auto = autoself.path = pathdef images(self):img0 = cv2.imread(self.path)  # BGRimg = letterbox(img0, self.img_size, stride=self.stride, auto=self.auto)[0]img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGBimg = np.ascontiguousarray(img)return self.path, img, img0def get_input_data(source,device,imgsz,stride=32,pt=False):imgsz = check_img_size(imgsz, s=stride)path, img, im0s = LoadImages(source, img_size=imgsz, stride=stride, auto=pt).images()print(path, img.shape, im0s.shape)img = torch.from_numpy(img).to(device)# img = img.half() if half else img.float()  # uint8 to fp16/32# img = img.half()img = img / 255.0  # 0 - 255 to 0.0 - 1.0if len(img.shape) == 3:img = img[None]  # expand for batch dimreturn path, img, im0sdef xywh2xyxy(x):# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-righty = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left xy[:, 1] = x[:, 1] - x[:, 3] / 2  # top left yy[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right xy[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right yreturn y
def box_iou(box1, box2):def box_area(box):return (box[2] - box[0]) * (box[3] - box[1])area1 = box_area(box1.T)area2 = box_area(box2.T)inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,labels=(), max_det=300):nc = prediction.shape[2] - 5  # number of classesxc = prediction[..., 4] > conf_thres  # candidates# Checksassert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'# Settingsmin_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and heightmax_nms = 30000  # maximum number of boxes into torchvision.ops.nms()time_limit = 10.0  # seconds to quit afterredundant = True  # require redundant detectionsmulti_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)merge = False  # use merge-NMSt = time.time()output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]for xi, x in enumerate(prediction):  # image index, image inference# Apply constraints# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-heightx = x[xc[xi]]  # confidence# Cat apriori labels if autolabellingif labels and len(labels[xi]):l = labels[xi]v = torch.zeros((len(l), nc + 5), device=x.device)v[:, :4] = l[:, 1:5]  # boxv[:, 4] = 1.0  # confv[range(len(l)), l[:, 0].long() + 5] = 1.0  # clsx = torch.cat((x, v), 0)# If none remain process next imageif not x.shape[0]:continue# Compute confx[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf# Box (center x, center y, width, height) to (x1, y1, x2, y2)box = xywh2xyxy(x[:, :4])# Detections matrix nx6 (xyxy, conf, cls)if multi_label:i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).Tx = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)else:  # best class onlyconf, j = x[:, 5:].max(1, keepdim=True)x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]# Filter by classif classes is not None:x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]# Apply finite constraint# if not torch.isfinite(x).all():#     x = x[torch.isfinite(x).all(1)]# Check shapen = x.shape[0]  # number of boxesif not n:  # no boxescontinueelif n > max_nms:  # excess boxesx = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence# Batched NMSc = x[:, 5:6] * (0 if agnostic else max_wh)  # classesboxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scoresi = torchvision.ops.nms(boxes, scores, iou_thres)  # NMSif i.shape[0] > max_det:  # limit detectionsi = i[:max_det]if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrixweights = iou * scores[None]  # box weightsx[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxesif redundant:i = i[iou.sum(1) > 1]  # require redundancyoutput[xi] = x[i]if (time.time() - t) > time_limit:print(f'WARNING: NMS time limit {time_limit}s exceeded')break  # time limit exceededreturn outputdef clip_coords(boxes, shape):# Clip bounding xyxy bounding boxes to image shape (height, width)if isinstance(boxes, torch.Tensor):  # faster individuallyboxes[:, 0].clamp_(0, shape[1])  # x1boxes[:, 1].clamp_(0, shape[0])  # y1boxes[:, 2].clamp_(0, shape[1])  # x2boxes[:, 3].clamp_(0, shape[0])  # y2else:  # np.array (faster grouped)boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1])  # x1, x2boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0])  # y1, y2
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):# Rescale coords (xyxy) from img1_shape to img0_shapeif ratio_pad is None:  # calculate from img0_shapegain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / newpad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh paddingelse:gain = ratio_pad[0][0]pad = ratio_pad[1]coords[:, [0, 2]] -= pad[0]  # x paddingcoords[:, [1, 3]] -= pad[1]  # y paddingcoords[:, :4] /= gainclip_coords(coords, img0_shape)return coords
def shows(pred,im0,img):pred[:, :4] = scale_coords(img.shape[2:], pred[:, :4], im0.shape).round()cv2.rectangle(im0,(int(pred[0]),int(pred[1])),(int(pred[2]),int(pred[3])),(255,0,0),1)cv2.imshow("dfsd",im0s)cv2.waitKey(0)
def makedir():os.makedirs('./result/0/', exist_ok=True)os.makedirs('./result/1/', exist_ok=True)os.makedirs('./result/none/', exist_ok=True)
def main(show,save,base_path=None,model_path=None):model = ONNXModel(model_path)imgsz, stride, pt = 320, 32, Falsedevice = 'cuda'for i in os.listdir(base_path):source = base_path + str(i)print(source)if i =='.DS_Store':continuepath, img, im0s = get_input_data(source,device,imgsz)pred = model.forward(np.array(img.cpu()))[0]pred = torch.from_numpy(pred)pred, conf_thres, iou_thres, classes, agnostic_nms, max_det = pred, 0.25, 0.45, None, False, 1pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)[0]if save:if pred.shape[0]==0:cv2.write(im0s, './result/none/' + str(path.split('/')[-1]))print("not found any object")return "!!"pred[:, :4] = scale_coords(img.shape[2:], pred[:, :4], im0s.shape).round()pred = np.array(pred.cpu())[0]# print('last:', type(pred[:, -1]), pred[:, -1])print(pred, type(int(pred[-1])),int(pred[-1]))makedir()if show:cv2.rectangle(im0s, (int(pred[0]), int(pred[1])), (int(pred[2]), int(pred[3])), (255, 0, 0), 1)cv2.imshow("dfsd", im0s)cv2.waitKey(0)if save:cv2.rectangle(im0s, (int(pred[0]), int(pred[1])), (int(pred[2]), int(pred[3])), (255, 0, 0), 1)cv2.putText(im0s, str(int(pred[-1])), (int(pred[0]), int(pred[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.6,(255, 0, 255), 2)cv2.putText(im0s, str(pred[-2].round(2)), (int(pred[2]), int(pred[3])), cv2.FONT_HERSHEY_SIMPLEX, 0.4,(0, 0, 255), 1)if int(pred[-1])==0:cv2.imwrite('./result/0/' + str(path.split('/')[-1]),im0s)elif int(pred[-1])==1:cv2.imwrite('./result/1/' + str(path.split('/')[-1]),im0s)
if __name__=='__main__':parser = argparse.ArgumentParser()parser.add_argument('--model_path',type=str,default='*.onnx',help='model path(s)')parser.add_argument('--source', type=str,default='/images/',help='data path(s)')parser.add_argument('--show',type=bool,default=False,help='show images  or not ')parser.add_argument('--save',type=bool,default=True,help='save images or not ')opt = parser.parse_args()main(opt.show, opt.save, opt.source, opt.model_path)

2、onnxgpu版本的前向推理测试

import onnx
import onnxruntime
import numpy as np
class ONNXModel():def __init__(self, onnx_path):""":param onnx_path:"""self.onnx_session = onnxruntime.InferenceSession(onnx_path,providers=['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider'])self.input_name = self.get_input_name(self.onnx_session)self.output_name = self.get_output_name(self.onnx_session)print("input_name:{}".format(self.input_name))print("output_name:{}".format(self.output_name))def get_output_name(self, onnx_session):"""output_name = onnx_session.get_outputs()[0].name:param onnx_session::return:"""output_name = []for node in onnx_session.get_outputs():output_name.append(node.name)return output_namedef get_input_name(self, onnx_session):"""input_name = onnx_session.get_inputs()[0].name:param onnx_session::return:"""input_name = []for node in onnx_session.get_inputs():input_name.append(node.name)return input_namedef get_input_feed(self, input_name, image_tensor):"""input_feed={self.input_name: image_tensor}:param input_name::param image_tensor::return:"""input_feed = {}for name in input_name:input_feed[name] = image_tensorreturn input_feeddef forward(self, image_tensor):'''image_tensor = image.transpose(2, 0, 1)image_tensor = image_tensor[np.newaxis, :]onnx_session.run([output_name], {input_name: x}):param image_tensor::return:'''# 输入数据的类型必须与模型一致,以下三种写法都是可以的# scores, boxes = self.onnx_session.run(None, {self.input_name: image_tensor})# scores, boxes = self.onnx_session.run(self.output_name, input_feed={self.input_name: image_tensor})input_feed = self.get_input_feed(self.input_name, image_tensor)scores = self.onnx_session.run(self.output_name, input_feed=input_feed) #根据数据决定输出参数的个数，此处为两个（scores,boxes）return scores
if __name__=='__main__':import timestart_time = time.time()model_path = 'best.onnx'abc = ONNXModel(model_path)load_model = time.time()input_data = np.random.randn(1, 3, 320, 320).astype(np.float32)for i in range(100):abce = abc.forward(input_data)print(abce[3].shape)# time.sleep(20)print("load model time:%s,pred time:%s"%(load_model-start_time,(time.time()-load_model)/100))

14、yolov5-6中数据预处理、模型输出nms单独计算、onnxruntime的gpu版本前向推理相关推荐

PyTorch主要组成模块 | 数据读入 | 数据预处理 | 模型构建 | 模型初始化 | 损失函数 | 优化器 | 训练与评估
文章目录一.深度学习任务框架二.数据读入三.数据预处理模块-transforms 1.数据预处理transforms模块机制 2.二十二种transforms数据预处理方法 1.裁剪 2. 翻转 ...
Python数据分析中数据预处理：编码将文字型数据转换为数值型
[小白从小学Python.C.Java] [Python-计算机等级考试二级] [Python-数据分析] Python数据分析中数据预处理:编码将文字型数据转换为数值型选择题对于以下pyth ...
MIKE水动力笔记7_实测数据与模型输出结果的拟合对比
本文目录前言 Step 1 拟合对比前的准备工作 Step 2 从模型输出结果dfsu文件提取出站位点处的模拟潮位dfs0文件 Step 3 将两个dfs0文件插进绘图板 Step 4 对图面进行必 ...
python数据预处理代码_Python中数据预处理（代码）
本篇文章给大家带来的内容是关于Python中数据预处理(代码),有一定的参考价值,有需要的朋友可以参考一下,希望对你有所帮助.1.导入标准库import numpy as np import matp ...
数据挖掘中数据预处理方法_数据挖掘中的数据预处理
数据挖掘中数据预处理方法 In the previous article, we have discussed the Data Exploration with which we have star ...
PTMs：QLoRA技巧之源码解读(qlora.py文件)—解析命令与加载参数→数据预处理→模型训练+评估+推理
PTMs:QLoRA技巧之源码解读(qlora.py文件)-解析命令与加载参数→数据预处理→模型训练+评估+推理目录 QLoRA技巧之源码解读(qlora.py文件)-解析命令与加载参数→数据预处理 ...
MMDetection2.17-权重模型转推理模型(pth转onnx)详细步骤及前向推理(Win10、Linux均适用)
权重模型转推理模型的意义? 换后的ONNX模型注意要点: 转换流程及完整代码: 前向推理及完整代码: 权重模型转推理模型的意义? 方便部署:转为onnx格式的模型后,就可以不需要依赖mmdetecti ...
电影推荐系统（数据预处理+模型训练+预测）
博客源地址电影推荐思路利用doc2vec做电影推荐,其实核心就是比较两部电影介绍文本之间的向量相似程度.自然语言处理中的分布式假设提出了"某个单词的含义由它周围的单词形成" ...
机器学习中数据预处理方法
在知乎上也看到了这个,不知道哪个是原创,这里粘上链接 https://zhuanlan.zhihu.com/p/51131210 前言数据预处理的重要性? 熟悉数据挖掘和机器学习的小伙伴们都知道,数 ...

14、yolov5-6中数据预处理、模型输出nms单独计算、onnxruntime的gpu版本前向推理

14、yolov5-6中数据预处理、模型输出nms单独计算、onnxruntime的gpu版本前向推理相关推荐

最新文章

热门文章