14、yolov5-6中数据预处理、模型输出nms单独计算、onnxruntime的gpu版本前向推理
1、数据输入预处理、模型输出nms等
import os
import torch
import time
import math
import cv2
import numpy as np
import torchvision
import argparsefrom load_onnx_model import *
def make_divisible(x, divisor):# Returns x evenly divisible by divisorreturn math.ceil(x / divisor) * divisordef check_img_size(imgsz, s=32, floor=0):# Verify image size is a multiple of stride s in each dimensionif isinstance(imgsz, int): # integer i.e. img_size=640new_size = max(make_divisible(imgsz, int(s)), floor)else: # list i.e. img_size=[640, 480]new_size = [max(make_divisible(x, int(s)), floor) for x in imgsz]if new_size != imgsz:print(f'WARNING: --img-size {imgsz} must be multiple of max stride {s}, updating to {new_size}')return new_sizedef letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):# Resize and pad image while meeting stride-multiple constraintsshape = im.shape[:2] # current shape [height, width]if isinstance(new_shape, int):new_shape = (new_shape, new_shape)# Scale ratio (new / old)r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])if not scaleup: # only scale down, do not scale up (for better val mAP)r = min(r, 1.0)# Compute paddingratio = r, r # width, height ratiosnew_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh paddingif auto: # minimum rectangledw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh paddingelif scaleFill: # stretchdw, dh = 0.0, 0.0new_unpad = (new_shape[1], new_shape[0])ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratiosdw /= 2 # divide padding into 2 sidesdh /= 2if shape[::-1] != new_unpad: # resizeim = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))left, right = int(round(dw - 0.1)), int(round(dw + 0.1))im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add borderreturn im, ratio, (dw, dh)class LoadImages:# YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`def __init__(self, path, img_size=640, stride=32, auto=True):self.img_size = img_sizeself.stride = strideself.auto = autoself.path = pathdef images(self):img0 = cv2.imread(self.path) # BGRimg = letterbox(img0, self.img_size, stride=self.stride, auto=self.auto)[0]img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGBimg = np.ascontiguousarray(img)return self.path, img, img0def get_input_data(source,device,imgsz,stride=32,pt=False):imgsz = check_img_size(imgsz, s=stride)path, img, im0s = LoadImages(source, img_size=imgsz, stride=stride, auto=pt).images()print(path, img.shape, im0s.shape)img = torch.from_numpy(img).to(device)# img = img.half() if half else img.float() # uint8 to fp16/32# img = img.half()img = img / 255.0 # 0 - 255 to 0.0 - 1.0if len(img.shape) == 3:img = img[None] # expand for batch dimreturn path, img, im0sdef xywh2xyxy(x):# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-righty = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left xy[:, 1] = x[:, 1] - x[:, 3] / 2 # top left yy[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right xy[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right yreturn y
def box_iou(box1, box2):def box_area(box):return (box[2] - box[0]) * (box[3] - box[1])area1 = box_area(box1.T)area2 = box_area(box2.T)inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter)def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,labels=(), max_det=300):nc = prediction.shape[2] - 5 # number of classesxc = prediction[..., 4] > conf_thres # candidates# Checksassert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'# Settingsmin_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and heightmax_nms = 30000 # maximum number of boxes into torchvision.ops.nms()time_limit = 10.0 # seconds to quit afterredundant = True # require redundant detectionsmulti_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)merge = False # use merge-NMSt = time.time()output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]for xi, x in enumerate(prediction): # image index, image inference# Apply constraints# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-heightx = x[xc[xi]] # confidence# Cat apriori labels if autolabellingif labels and len(labels[xi]):l = labels[xi]v = torch.zeros((len(l), nc + 5), device=x.device)v[:, :4] = l[:, 1:5] # boxv[:, 4] = 1.0 # confv[range(len(l)), l[:, 0].long() + 5] = 1.0 # clsx = torch.cat((x, v), 0)# If none remain process next imageif not x.shape[0]:continue# Compute confx[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf# Box (center x, center y, width, height) to (x1, y1, x2, y2)box = xywh2xyxy(x[:, :4])# Detections matrix nx6 (xyxy, conf, cls)if multi_label:i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).Tx = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)else: # best class onlyconf, j = x[:, 5:].max(1, keepdim=True)x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]# Filter by classif classes is not None:x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]# Apply finite constraint# if not torch.isfinite(x).all():# x = x[torch.isfinite(x).all(1)]# Check shapen = x.shape[0] # number of boxesif not n: # no boxescontinueelif n > max_nms: # excess boxesx = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence# Batched NMSc = x[:, 5:6] * (0 if agnostic else max_wh) # classesboxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scoresi = torchvision.ops.nms(boxes, scores, iou_thres) # NMSif i.shape[0] > max_det: # limit detectionsi = i[:max_det]if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)iou = box_iou(boxes[i], boxes) > iou_thres # iou matrixweights = iou * scores[None] # box weightsx[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxesif redundant:i = i[iou.sum(1) > 1] # require redundancyoutput[xi] = x[i]if (time.time() - t) > time_limit:print(f'WARNING: NMS time limit {time_limit}s exceeded')break # time limit exceededreturn outputdef clip_coords(boxes, shape):# Clip bounding xyxy bounding boxes to image shape (height, width)if isinstance(boxes, torch.Tensor): # faster individuallyboxes[:, 0].clamp_(0, shape[1]) # x1boxes[:, 1].clamp_(0, shape[0]) # y1boxes[:, 2].clamp_(0, shape[1]) # x2boxes[:, 3].clamp_(0, shape[0]) # y2else: # np.array (faster grouped)boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):# Rescale coords (xyxy) from img1_shape to img0_shapeif ratio_pad is None: # calculate from img0_shapegain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / newpad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh paddingelse:gain = ratio_pad[0][0]pad = ratio_pad[1]coords[:, [0, 2]] -= pad[0] # x paddingcoords[:, [1, 3]] -= pad[1] # y paddingcoords[:, :4] /= gainclip_coords(coords, img0_shape)return coords
def shows(pred,im0,img):pred[:, :4] = scale_coords(img.shape[2:], pred[:, :4], im0.shape).round()cv2.rectangle(im0,(int(pred[0]),int(pred[1])),(int(pred[2]),int(pred[3])),(255,0,0),1)cv2.imshow("dfsd",im0s)cv2.waitKey(0)
def makedir():os.makedirs('./result/0/', exist_ok=True)os.makedirs('./result/1/', exist_ok=True)os.makedirs('./result/none/', exist_ok=True)
def main(show,save,base_path=None,model_path=None):model = ONNXModel(model_path)imgsz, stride, pt = 320, 32, Falsedevice = 'cuda'for i in os.listdir(base_path):source = base_path + str(i)print(source)if i =='.DS_Store':continuepath, img, im0s = get_input_data(source,device,imgsz)pred = model.forward(np.array(img.cpu()))[0]pred = torch.from_numpy(pred)pred, conf_thres, iou_thres, classes, agnostic_nms, max_det = pred, 0.25, 0.45, None, False, 1pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)[0]if save:if pred.shape[0]==0:cv2.write(im0s, './result/none/' + str(path.split('/')[-1]))print("not found any object")return "!!"pred[:, :4] = scale_coords(img.shape[2:], pred[:, :4], im0s.shape).round()pred = np.array(pred.cpu())[0]# print('last:', type(pred[:, -1]), pred[:, -1])print(pred, type(int(pred[-1])),int(pred[-1]))makedir()if show:cv2.rectangle(im0s, (int(pred[0]), int(pred[1])), (int(pred[2]), int(pred[3])), (255, 0, 0), 1)cv2.imshow("dfsd", im0s)cv2.waitKey(0)if save:cv2.rectangle(im0s, (int(pred[0]), int(pred[1])), (int(pred[2]), int(pred[3])), (255, 0, 0), 1)cv2.putText(im0s, str(int(pred[-1])), (int(pred[0]), int(pred[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.6,(255, 0, 255), 2)cv2.putText(im0s, str(pred[-2].round(2)), (int(pred[2]), int(pred[3])), cv2.FONT_HERSHEY_SIMPLEX, 0.4,(0, 0, 255), 1)if int(pred[-1])==0:cv2.imwrite('./result/0/' + str(path.split('/')[-1]),im0s)elif int(pred[-1])==1:cv2.imwrite('./result/1/' + str(path.split('/')[-1]),im0s)
if __name__=='__main__':parser = argparse.ArgumentParser()parser.add_argument('--model_path',type=str,default='*.onnx',help='model path(s)')parser.add_argument('--source', type=str,default='/images/',help='data path(s)')parser.add_argument('--show',type=bool,default=False,help='show images or not ')parser.add_argument('--save',type=bool,default=True,help='save images or not ')opt = parser.parse_args()main(opt.show, opt.save, opt.source, opt.model_path)
2、onnxgpu版本的前向推理测试
import onnx
import onnxruntime
import numpy as np
class ONNXModel():def __init__(self, onnx_path):""":param onnx_path:"""self.onnx_session = onnxruntime.InferenceSession(onnx_path,providers=['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider'])self.input_name = self.get_input_name(self.onnx_session)self.output_name = self.get_output_name(self.onnx_session)print("input_name:{}".format(self.input_name))print("output_name:{}".format(self.output_name))def get_output_name(self, onnx_session):"""output_name = onnx_session.get_outputs()[0].name:param onnx_session::return:"""output_name = []for node in onnx_session.get_outputs():output_name.append(node.name)return output_namedef get_input_name(self, onnx_session):"""input_name = onnx_session.get_inputs()[0].name:param onnx_session::return:"""input_name = []for node in onnx_session.get_inputs():input_name.append(node.name)return input_namedef get_input_feed(self, input_name, image_tensor):"""input_feed={self.input_name: image_tensor}:param input_name::param image_tensor::return:"""input_feed = {}for name in input_name:input_feed[name] = image_tensorreturn input_feeddef forward(self, image_tensor):'''image_tensor = image.transpose(2, 0, 1)image_tensor = image_tensor[np.newaxis, :]onnx_session.run([output_name], {input_name: x}):param image_tensor::return:'''# 输入数据的类型必须与模型一致,以下三种写法都是可以的# scores, boxes = self.onnx_session.run(None, {self.input_name: image_tensor})# scores, boxes = self.onnx_session.run(self.output_name, input_feed={self.input_name: image_tensor})input_feed = self.get_input_feed(self.input_name, image_tensor)scores = self.onnx_session.run(self.output_name, input_feed=input_feed) #根据数据决定输出参数的个数,此处为两个(scores,boxes)return scores
if __name__=='__main__':import timestart_time = time.time()model_path = 'best.onnx'abc = ONNXModel(model_path)load_model = time.time()input_data = np.random.randn(1, 3, 320, 320).astype(np.float32)for i in range(100):abce = abc.forward(input_data)print(abce[3].shape)# time.sleep(20)print("load model time:%s,pred time:%s"%(load_model-start_time,(time.time()-load_model)/100))
14、yolov5-6中数据预处理、模型输出nms单独计算、onnxruntime的gpu版本前向推理相关推荐
- PyTorch主要组成模块 | 数据读入 | 数据预处理 | 模型构建 | 模型初始化 | 损失函数 | 优化器 | 训练与评估
文章目录 一.深度学习任务框架 二.数据读入 三.数据预处理模块-transforms 1.数据预处理transforms模块机制 2.二十二种transforms数据预处理方法 1.裁剪 2. 翻转 ...
- Python数据分析中数据预处理:编码将文字型数据转换为数值型
[小白从小学Python.C.Java] [Python-计算机等级考试二级] [Python-数据分析] Python数据分析中 数据预处理:编码 将文字型数据转换为数值型 选择题 对于以下pyth ...
- MIKE水动力笔记7_实测数据与模型输出结果的拟合对比
本文目录 前言 Step 1 拟合对比前的准备工作 Step 2 从模型输出结果dfsu文件提取出站位点处的模拟潮位dfs0文件 Step 3 将两个dfs0文件插进绘图板 Step 4 对图面进行必 ...
- python数据预处理代码_Python中数据预处理(代码)
本篇文章给大家带来的内容是关于Python中数据预处理(代码),有一定的参考价值,有需要的朋友可以参考一下,希望对你有所帮助.1.导入标准库import numpy as np import matp ...
- 数据挖掘中数据预处理方法_数据挖掘中的数据预处理
数据挖掘中数据预处理方法 In the previous article, we have discussed the Data Exploration with which we have star ...
- PTMs:QLoRA技巧之源码解读(qlora.py文件)—解析命令与加载参数→数据预处理→模型训练+评估+推理
PTMs:QLoRA技巧之源码解读(qlora.py文件)-解析命令与加载参数→数据预处理→模型训练+评估+推理 目录 QLoRA技巧之源码解读(qlora.py文件)-解析命令与加载参数→数据预处理 ...
- MMDetection2.17-权重模型转推理模型(pth转onnx)详细步骤及前向推理(Win10、Linux均适用)
权重模型转推理模型的意义? 换后的ONNX模型注意要点: 转换流程及完整代码: 前向推理及完整代码: 权重模型转推理模型的意义? 方便部署:转为onnx格式的模型后,就可以不需要依赖mmdetecti ...
- 电影推荐系统(数据预处理+模型训练+预测)
博客源地址 电影推荐思路 利用doc2vec做电影推荐,其实核心就是比较两部电影介绍文本之间的向量相似程度.自然语言处理中的分布式假设提出了"某个单词的含义由它周围的单词形成" ...
- 机器学习中数据预处理方法
在知乎上也看到了这个,不知道哪个是原创,这里粘上链接 https://zhuanlan.zhihu.com/p/51131210 前言 数据预处理的重要性? 熟悉数据挖掘和机器学习的小伙伴们都知道,数 ...
最新文章
- 自然语言推理和数据集
- java 编写方法和属性,编写一个关于教师的JAVA-BEAN,要求实现其基本属性如年龄,性别等的get和set方法...
- 《从零开始学Swift》学习笔记(Day 20)——函数中参数的传递引用
- python和c有什么区别_c 跟 python的区别有哪些
- python的创始人、特点应用领域_python学习笔记(python发展介绍)
- 需要多长时间达到一个本科毕业生刚毕业的水平。
- 如何查看linux服务器的白名单,linux服务器iptables防火墙白名单添加方式
- section怎么制造图框_cad中如何制作带属性块的图框 - CAD自学网
- H264--4--H264编码[7]
- 如何快速学会java编程?
- 尽量不要在viewWillDisappear:方法中移除通知
- java基本类型引用类型_Java基本类型和引用类型
- 深入PHP内核之ZVAL
- P. Laguna/Evaluation of an Automatic Threshold Based Detector of Waveform Limits in Holter ECG
- SQL Server查询优化
- 【资源】机器学习资源积累(积累中...)
- HBuilder打包
- tk域名ml域名ga域名cf域名免费顶级域名获取及域名解析绑定IP发布网站
- POJ 1436.Horizontally Visible Segments-线段树(区间更新、端点放大2倍)
- 蓝牙基带数据传输机理分析
热门文章
- Google测试工师的一道面试题目
- 基本数据类型 java
- EEGLAB系列教程5:数据预处理2(ICA去伪迹)
- Android 6.0 新特性和功能,系统和 API 行为更新
- 【鬼畜】UVA - 401每日一题·猛男就是要暴力打表
- JS总结——获取元素的各种高度宽度
- linux下网卡参数配置,linux网卡配置参数
- DL之CNN(paper):关于CNN(卷积神经网络)经典论文原文(1950~2018)简介、下载地址大全(非常有价值)之持续更新(吐血整理)
- caffe学习笔记20-BatchNorm层分析
- 【记录踩坑】配置本地访问远程Linux系统服务器的jupyter notebook