最终效果

输入一张图片到模型-->模型能够检测到物体并把框画出来

代码位置

inference.py前向推理代码

# -------------------------------------#
#       创建YOLO类
# -------------------------------------#
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
import cv2
import numpy as np
import colorsys
import os
import torch
import torch.nn as nn
from yolo4 import YoloBody
from utils.utils import *
from yolo_layer import *# --------------------------------------------#
#   使用自己训练好的模型预测需要修改2个参数
#   model_path和classes_path都需要修改！
# --------------------------------------------#
class Inference(object):# ---------------------------------------------------##   初始化模型和参数，导入已经训练好的权重# ---------------------------------------------------#def __init__(self, **kwargs):self.model_path = kwargs['model_path']self.anchors_path = kwargs['anchors_path']self.classes_path = kwargs['classes_path']self.model_image_size = kwargs['model_image_size']self.confidence = kwargs['confidence']self.cuda = kwargs['cuda']self.class_names = self.get_class()   # class_names=['person','bicycle','car',...,'toothbrush']self.anchors = self.get_anchors()   # anchors=[12.0,16.0,19.0,36.0,...,401]print(self.anchors)self.net = YoloBody(3, len(self.class_names)).eval()  # .eval()表示不启用BN和Dropout层,这里的YoloBody表示从开始一直到最后面的蓝色卷积那里self.load_model_pth(self.net, self.model_path)if self.cuda:self.net = self.net.cuda()self.net.eval()print('Finished!')self.yolo_decodes = []anchor_masks = [[0,1,2],[3,4,5],[6,7,8]]for i in range(3):head = YoloLayer(self.model_image_size, anchor_masks, len(self.class_names),self.anchors, len(self.anchors)//2).eval()self.yolo_decodes.append(head)print('{} model, anchors, and classes loaded.'.format(self.model_path))def load_model_pth(self, model, pth):print('Loading weights into state dict, name: %s' % (pth))device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')model_dict = model.state_dict()pretrained_dict = torch.load(pth, map_location=device) # map_location用于切换GPU和CPUmatched_dict = {}  # 匹配字典for k, v in pretrained_dict.items(): # k是层名称，v是参数if np.shape(model_dict[k]) == np.shape(v):matched_dict[k] = velse:print('un matched layers: %s' % k)print(len(model_dict.keys()), len(pretrained_dict.keys()))print('%d layers matched,  %d layers miss' % (len(matched_dict.keys()), len(model_dict) - len(matched_dict.keys())))model_dict.update(matched_dict) # 根据matched_dict更新model_dictmodel.load_state_dict(pretrained_dict)  #print('Finished!')return model# ---------------------------------------------------##   获得所有的分类# ---------------------------------------------------#def get_class(self):classes_path = os.path.expanduser(self.classes_path) # 找到classes_path的绝对路径with open(classes_path) as f:class_names = f.readlines()class_names = [c.strip() for c in class_names]return class_names# ---------------------------------------------------##   获得所有的先验框# ---------------------------------------------------#def get_anchors(self):anchors_path = os.path.expanduser(self.anchors_path)with open(anchors_path) as f:anchors = f.readline()anchors = [float(x) for x in anchors.split(',')]return anchors#return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :]# ---------------------------------------------------##   检测图片# ---------------------------------------------------#def detect_image(self, image_src):h, w, _ = image_src.shapeimage = cv2.resize(image_src, (608, 608))image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)img = np.array(image, dtype=np.float32)img = np.transpose(img / 255.0, (2, 0, 1))images = np.asarray([img])with torch.no_grad():images = torch.from_numpy(images)if self.cuda:images = images.cuda()outputs = self.net(images)  # 调用YoloBody里面的前向传播，得到三个蓝色卷积的输出。output_list = []for i in range(3):output_list.append(self.yolo_decodes[i](outputs[i]))output = torch.cat(output_list, 1)print(output.shape)batch_detections = non_max_suppression(output, len(self.class_names),conf_thres=self.confidence,nms_thres=0.1)boxes = [box.cpu().numpy() for box in batch_detections]print(boxes[0])return boxes[0]if __name__ == '__main__':params = {"model_path": 'pth/yolo4_weights_my.pth',"anchors_path": 'work_dir/yolo_anchors_coco.txt',"classes_path": 'work_dir/coco_classes.txt',"model_image_size": (608, 608, 3),"confidence": 0.4,"cuda": True}model = Inference(**params)class_names = load_class_names(params['classes_path'])image_src = cv2.imread('dog.jpg')boxes = model.detect_image(image_src)plot_boxes_cv2(image_src, boxes, savename='output3.jpg', class_names=class_names)

utils.py与前向传播有关的函数

dataloader与训练有关的函数

from random import shuffle
import numpy as np
import torch
import torch.nn as nn
import math
import torch.nn.functional as F
from PIL import Image
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
from utils.utils import bbox_iou, merge_bboxes
from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
# from nets.yolo_training import Generator
import cv2class TestDataset(Dataset):def __init__(self, lines, image_size):super(TestDataset, self).__init__()self.test_lines = linesself.test_batches = len(lines)self.image_size = image_sizedef __len__(self):return self.test_batchesdef __getitem__(self, index):one_line = self.test_lines[index]line = one_line.split()image_src = cv2.imread(line[0])h, w, _ = image_src.shapeimage = cv2.resize(image_src, (self.image_size[1], self.image_size[0]))image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)y = np.array([np.array(list(map(float, box.split(',')))) for box in line[1:]])img = np.array(image, dtype=np.float32)img = np.transpose(img / 255.0, (2, 0, 1))return image_src, img, y, [h, w, line[0]]class TrainDataset(Dataset):def __init__(self, train_lines, image_size, mosaic=True):super(TrainDataset, self).__init__()self.train_lines = train_linesself.train_batches = len(train_lines)self.image_size = image_sizeself.mosaic = mosaicself.flag = Truedef __len__(self):return self.train_batchesdef rand(self, a=0, b=1):return np.random.rand() * (b - a) + adef get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5):"""实时数据增强的随机预处理"""line = annotation_line.split()image = Image.open(line[0])iw, ih = image.sizeh, w = input_shapebox = np.array([np.array(list(map(float, box.split(',')))) for box in line[1:]])# 调整图片大小new_ar = w / h * self.rand(1 - jitter, 1 + jitter) / self.rand(1 - jitter, 1 + jitter)scale = self.rand(.25, 2)if new_ar < 1:nh = int(scale * h)nw = int(nh * new_ar)else:nw = int(scale * w)nh = int(nw / new_ar)image = image.resize((nw, nh), Image.BICUBIC)# 放置图片dx = int(self.rand(0, w - nw))dy = int(self.rand(0, h - nh))new_image = Image.new('RGB', (w, h),(np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))new_image.paste(image, (dx, dy))image = new_image# 是否翻转图片flip = self.rand() < .5if flip:image = image.transpose(Image.FLIP_LEFT_RIGHT)# 色域变换hue = self.rand(-hue, hue)sat = self.rand(1, sat) if self.rand() < .5 else 1 / self.rand(1, sat)val = self.rand(1, val) if self.rand() < .5 else 1 / self.rand(1, val)x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)x[..., 0] += hue*360x[..., 0][x[..., 0]>1] -= 1x[..., 0][x[..., 0]<0] += 1x[..., 1] *= satx[..., 2] *= valx[x[:,:, 0]>360, 0] = 360x[:, :, 1:][x[:, :, 1:]>1] = 1x[x<0] = 0image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB)*255# 调整目标框坐标box_data = np.zeros((len(box), 5))if len(box) > 0:np.random.shuffle(box)box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dxbox[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dyif flip:box[:, [0, 2]] = w - box[:, [2, 0]]box[:, 0:2][box[:, 0:2] < 0] = 0box[:, 2][box[:, 2] > w] = wbox[:, 3][box[:, 3] > h] = hbox_w = box[:, 2] - box[:, 0]box_h = box[:, 3] - box[:, 1]box = box[np.logical_and(box_w > 1, box_h > 1)]  # 保留有效框box_data = np.zeros((len(box), 5))box_data[:len(box)] = boxif len(box) == 0:return image_data, []if (box_data[:, :4] > 0).any():return image_data, box_dataelse:return image_data, []def get_random_data_with_Mosaic(self, annotation_line, input_shape, hue=.1, sat=1.5, val=1.5):h, w = input_shapemin_offset_x = 0.3min_offset_y = 0.3scale_low = 1 - min(min_offset_x, min_offset_y)scale_high = scale_low + 0.2image_datas = []box_datas = []index = 0place_x = [0, 0, int(w * min_offset_x), int(w * min_offset_x)]place_y = [0, int(h * min_offset_y), int(w * min_offset_y), 0]for line in annotation_line:# 每一行进行分割line_content = line.split()# 打开图片image = Image.open(line_content[0])image = image.convert("RGB")# 图片的大小iw, ih = image.size# 保存框的位置box = np.array([np.array(list(map(float, box.split(',')))) for box in line_content[1:]])# 是否翻转图片flip = self.rand() < .5if flip and len(box) > 0:image = image.transpose(Image.FLIP_LEFT_RIGHT)box[:, [0, 2]] = iw - box[:, [2, 0]]# 对输入进来的图片进行缩放new_ar = w / hscale = self.rand(scale_low, scale_high)if new_ar < 1:nh = int(scale * h)nw = int(nh * new_ar)else:nw = int(scale * w)nh = int(nw / new_ar)image = image.resize((nw, nh), Image.BICUBIC)# 进行色域变换hue = self.rand(-hue, hue)sat = self.rand(1, sat) if self.rand() < .5 else 1 / self.rand(1, sat)val = self.rand(1, val) if self.rand() < .5 else 1 / self.rand(1, val)x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)x[..., 0] += hue*360x[..., 0][x[..., 0]>1] -= 1x[..., 0][x[..., 0]<0] += 1x[..., 1] *= satx[..., 2] *= valx[x[:,:, 0]>360, 0] = 360x[:, :, 1:][x[:, :, 1:]>1] = 1x[x<0] = 0image = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) # numpy array, 0 to 1image = Image.fromarray((image * 255).astype(np.uint8))# 将图片进行放置，分别对应四张分割图片的位置dx = place_x[index]dy = place_y[index]new_image = Image.new('RGB', (w, h),(np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))new_image.paste(image, (dx, dy))image_data = np.array(new_image)index = index + 1box_data = []# 对box进行重新处理if len(box) > 0:np.random.shuffle(box)box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dxbox[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dybox[:, 0:2][box[:, 0:2] < 0] = 0box[:, 2][box[:, 2] > w] = wbox[:, 3][box[:, 3] > h] = hbox_w = box[:, 2] - box[:, 0]box_h = box[:, 3] - box[:, 1]box = box[np.logical_and(box_w > 1, box_h > 1)]box_data = np.zeros((len(box), 5))box_data[:len(box)] = boximage_datas.append(image_data)box_datas.append(box_data)# 将图片分割，放在一起cutx = np.random.randint(int(w * min_offset_x), int(w * (1 - min_offset_x)))cuty = np.random.randint(int(h * min_offset_y), int(h * (1 - min_offset_y)))new_image = np.zeros([h, w, 3])new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]# 对框进行进一步的处理new_boxes = np.array(merge_bboxes(box_datas, cutx, cuty))if len(new_boxes) == 0:return new_image, []if (new_boxes[:, :4] > 0).any():return new_image, new_boxeselse:return new_image, []def __getitem__(self, index):if index == 0:shuffle(self.train_lines)lines = self.train_linesn = self.train_batchesindex = index % nif self.mosaic:if self.flag and (index + 4) < n:img, y = self.get_random_data_with_Mosaic(lines[index:index + 4], self.image_size[0:2])else:img, y = self.get_random_data(lines[index], self.image_size[0:2])self.flag = bool(1-self.flag)else:img, y = self.get_random_data(lines[index], self.image_size[0:2])if len(y) != 0:# 从坐标转换成0~1的百分比boxes = np.array(y[:, :4], dtype=np.float32)boxes[:, 0] = boxes[:, 0] / self.image_size[1]boxes[:, 1] = boxes[:, 1] / self.image_size[0]boxes[:, 2] = boxes[:, 2] / self.image_size[1]boxes[:, 3] = boxes[:, 3] / self.image_size[0]boxes = np.maximum(np.minimum(boxes, 1), 0)boxes[:, 2] = boxes[:, 2] - boxes[:, 0]boxes[:, 3] = boxes[:, 3] - boxes[:, 1]boxes[:, 0] = boxes[:, 0] + boxes[:, 2] / 2boxes[:, 1] = boxes[:, 1] + boxes[:, 3] / 2y = np.concatenate([boxes, y[:, -1:]], axis=-1)img = np.array(img, dtype=np.float32)tmp_inp = np.transpose(img / 255.0, (2, 0, 1))tmp_targets = np.array(y, dtype=np.float32)return tmp_inp, tmp_targets# DataLoader中collate_fn使用
def train_dataset_collate(batch):images = []bboxes = []for img, box in batch:images.append(img)bboxes.append(box)images = np.array(images)bboxes = np.array(bboxes)return images, bboxesdef test_dataset_collate(batch):srcs = []inputs = []targets = []shapes = []for img_src, img, labels, infos in batch:srcs.append(img_src)inputs.append(img)targets.append(labels)shapes.append(infos)inputs = np.array(inputs, dtype=np.float32)return srcs, inputs, targets, shapes

generator与训练有关的函数

import time
from PIL import Image
import numpy as np
import cv2
from random import shuffle
from utils.utils import merge_bboxesdef rand(a=0, b=1):return np.random.rand()*(b-a) + aclass TrainGenerator(object):def __init__(self, batch_size,train_lines, image_size,):self.batch_size = batch_sizeself.train_lines = train_linesself.train_batches = len(train_lines)self.image_size = image_sizeself.test_time = time.time()def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5):'''r实时数据增强的随机预处理'''line = annotation_line.split()image = Image.open(line[0])iw, ih = image.sizeh, w = input_shapebox = np.array([np.array(list(map(float, box.split(',')))) for box in line[1:]])# resize imagenew_ar = w / h * rand(1 - jitter, 1 + jitter) / rand(1 - jitter, 1 + jitter)scale = rand(.25, 2)if new_ar < 1:nh = int(scale * h)nw = int(nh * new_ar)else:nw = int(scale * w)nh = int(nw / new_ar)image = image.resize((nw, nh), Image.BICUBIC)# place imagedx = int(rand(0, w - nw))dy = int(rand(0, h - nh))new_image = Image.new('RGB', (w, h), (128, 128, 128))new_image.paste(image, (dx, dy))image = new_image# flip image or notflip = rand() < .5if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)# distort imagehue = rand(-hue, hue)sat = rand(1, sat) if rand() < .5 else 1 / rand(1, sat)val = rand(1, val) if rand() < .5 else 1 / rand(1, val)x = cv2.cvtColor(np.array(image, np.float32) / 255, cv2.COLOR_RGB2HSV)x[..., 0] += hue * 360x[..., 0][x[..., 0] > 1] -= 1x[..., 0][x[..., 0] < 0] += 1x[..., 1] *= satx[..., 2] *= valx[x[:, :, 0] > 360, 0] = 360x[:, :, 1:][x[:, :, 1:] > 1] = 1x[x < 0] = 0image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) * 255# correct boxesbox_data = np.zeros((len(box), 5))if len(box) > 0:np.random.shuffle(box)box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dxbox[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dyif flip: box[:, [0, 2]] = w - box[:, [2, 0]]box[:, 0:2][box[:, 0:2] < 0] = 0box[:, 2][box[:, 2] > w] = wbox[:, 3][box[:, 3] > h] = hbox_w = box[:, 2] - box[:, 0]box_h = box[:, 3] - box[:, 1]box = box[np.logical_and(box_w > 1, box_h > 1)]  # discard invalid boxbox_data = np.zeros((len(box), 5))box_data[:len(box)] = boxif len(box) == 0:return image_data, []if (box_data[:, :4] > 0).any():return image_data, box_dataelse:return image_data, []def get_random_data_with_Mosaic(self, annotation_line, input_shape, hue=.1, sat=1.5, val=1.5):'''random preprocessing for real-time data augmentation'''h, w = input_shapemin_offset_x = 0.4min_offset_y = 0.4scale_low = 1 - min(min_offset_x, min_offset_y)scale_high = scale_low + 0.2image_datas = []box_datas = []index = 0place_x = [0, 0, int(w * min_offset_x), int(w * min_offset_x)]place_y = [0, int(h * min_offset_y), int(w * min_offset_y), 0]for line in annotation_line:# 每一行进行分割line_content = line.split()# 打开图片image = Image.open(line_content[0])image = image.convert("RGB")# 图片的大小iw, ih = image.size# 保存框的位置box = np.array([np.array(list(map(float, box.split(',')))) for box in line_content[1:]])# 是否翻转图片flip = rand() < .5if flip and len(box) > 0:image = image.transpose(Image.FLIP_LEFT_RIGHT)box[:, [0, 2]] = iw - box[:, [2, 0]]# 对输入进来的图片进行缩放new_ar = w / hscale = rand(scale_low, scale_high)if new_ar < 1:nh = int(scale * h)nw = int(nh * new_ar)else:nw = int(scale * w)nh = int(nw / new_ar)image = image.resize((nw, nh), Image.BICUBIC)# 进行色域变换hue = rand(-hue, hue)sat = rand(1, sat) if rand() < .5 else 1 / rand(1, sat)val = rand(1, val) if rand() < .5 else 1 / rand(1, val)x = cv2.cvtColor(np.array(image, np.float32) / 255, cv2.COLOR_RGB2HSV)x[..., 0] += hue * 360x[..., 0][x[..., 0] > 1] -= 1x[..., 0][x[..., 0] < 0] += 1x[..., 1] *= satx[..., 2] *= valx[x[:, :, 0] > 360, 0] = 360x[:, :, 1:][x[:, :, 1:] > 1] = 1x[x < 0] = 0image = cv2.cvtColor(x, cv2.COLOR_HSV2RGB)  # numpy array, 0 to 1image = Image.fromarray((image * 255).astype(np.uint8))# 将图片进行放置，分别对应四张分割图片的位置dx = place_x[index]dy = place_y[index]new_image = Image.new('RGB', (w, h), (128, 128, 128))new_image.paste(image, (dx, dy))image_data = np.array(new_image)index = index + 1box_data = []# 对box进行重新处理if len(box) > 0:np.random.shuffle(box)box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dxbox[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dybox[:, 0:2][box[:, 0:2] < 0] = 0box[:, 2][box[:, 2] > w] = wbox[:, 3][box[:, 3] > h] = hbox_w = box[:, 2] - box[:, 0]box_h = box[:, 3] - box[:, 1]box = box[np.logical_and(box_w > 1, box_h > 1)]box_data = np.zeros((len(box), 5))box_data[:len(box)] = boximage_datas.append(image_data)box_datas.append(box_data)# 将图片分割，放在一起cutx = np.random.randint(int(w * min_offset_x), int(w * (1 - min_offset_x)))cuty = np.random.randint(int(h * min_offset_y), int(h * (1 - min_offset_y)))new_image = np.zeros([h, w, 3])new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]# 对框进行进一步的处理new_boxes = np.array(merge_bboxes(box_datas, cutx, cuty))if len(new_boxes) == 0:return new_image, []if (new_boxes[:, :4] > 0).any():return new_image, new_boxeselse:return new_image, []def generate(self, train=True, mosaic=True):while True:shuffle(self.train_lines)lines = self.train_linesinputs = []targets = []flag = Truen = len(lines)for i in range(len(lines)):if mosaic == True:if flag and (i + 4) < n:img, y = self.get_random_data_with_Mosaic(lines[i:i + 4], self.image_size[0:2])i = (i + 4) % nelse:img, y = self.get_random_data(lines[i], self.image_size[0:2])i = (i + 1) % nflag = bool(1 - flag)else:img, y = self.get_random_data(lines[i], self.image_size[0:2])i = (i + 1) % nif len(y) != 0:boxes = np.array(y[:, :4], dtype=np.float32)boxes[:, 0] = boxes[:, 0] / self.image_size[1]boxes[:, 1] = boxes[:, 1] / self.image_size[0]boxes[:, 2] = boxes[:, 2] / self.image_size[1]boxes[:, 3] = boxes[:, 3] / self.image_size[0]boxes = np.maximum(np.minimum(boxes, 1), 0)boxes[:, 2] = boxes[:, 2] - boxes[:, 0]boxes[:, 3] = boxes[:, 3] - boxes[:, 1]boxes[:, 0] = boxes[:, 0] + boxes[:, 2] / 2boxes[:, 1] = boxes[:, 1] + boxes[:, 3] / 2y = np.concatenate([boxes, y[:, -1:]], axis=-1)img = np.array(img, dtype=np.float32)inputs.append(np.transpose(img / 255.0, (2, 0, 1)))targets.append(np.array(y, dtype=np.float32))if len(targets) == self.batch_size:tmp_inp = np.array(inputs)tmp_targets = np.array(targets)inputs = []targets = []# print('data load use time:', time.time()-self.test_time)# self.test_time = time.time()yield tmp_inp, tmp_targetsclass TestGenerator(object):def __init__(self, batch_size, lines, image_size):self.batch_size = batch_sizeself.test_lines = linesself.test_batches = len(lines)self.image_size = image_sizedef generate(self):lines = self.test_linesinputs = []targets = []shapes = []for one_line in lines:print(one_line)line = one_line.split()image_src = cv2.imread(line[0])h, w, _ = image_src.shapeimage = cv2.resize(image_src, (self.image_size[1], self.image_size[0]))image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)y = np.array([np.array(list(map(float, box.split(',')))) for box in line[1:]])img = np.array(image, dtype=np.float32)inputs.append(np.transpose(img / 255.0, (2, 0, 1)))targets.append(y)shapes.append([h, w, line[0]])if len(targets) == self.batch_size:tmp_inp = np.array(inputs)tmp_targets = targetstmp_shapes = shapesinputs = []targets = []shapes = []# print('data load use time:', time.time()-self.test_time)# self.test_time = time.time()yield tmp_inp, tmp_targets, tmp_shapes

utills.py与前向传播有关的函数

画框函数：
导入类别函数：load_class_names
iou计算函数：
非极大值抑制函数：

from __future__ import division
import torch
import numpy as np
import math
import cv2def plot_boxes_cv2(img, boxes, savename=None, class_names=None, color=None):img = np.copy(img)colors = np.array([[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]], dtype=np.float32)def get_color(c, x, max_val):ratio = float(x) / max_val * 5i = int(math.floor(ratio))j = int(math.ceil(ratio))ratio = ratio - ir = (1 - ratio) * colors[i][c] + ratio * colors[j][c]return int(r * 255)width = img.shape[1]height = img.shape[0]for i in range(len(boxes)):box = boxes[i]x1 = int(box[0] * width)    # 相对原图位置*原图宽度，得到原图对应x坐标y1 = int(box[1] * height)   # 相对原图位置*原图宽度，得到原图对应y坐标x2 = int(box[2] * width)    #y2 = int(box[3] * height)if color:rgb = colorelse:rgb = (255, 0, 0)if len(box) >= 7 and class_names:cls_conf = box[5]cls_id = box[6]# print('%s: %f' % (class_names[cls_id], cls_conf))classes = len(class_names)offset = cls_id * 123457 % classesred = get_color(2, offset, classes)green = get_color(1, offset, classes)blue = get_color(0, offset, classes)if color is None:rgb = (red, green, blue)img = cv2.putText(img, class_names[int(cls_id)], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1.2, rgb, 2)img = cv2.rectangle(img, (x1, y1), (x2, y2), rgb, 3)if savename:print("save plot results to %s" % savename)cv2.imwrite(savename, img)return imgdef load_class_names(namesfile):class_names = []with open(namesfile, 'r') as fp:lines = fp.readlines()for line in lines:line = line.rstrip()class_names.append(line)return class_namesdef bbox_iou1(box1, box2, x1y1x2y2=True):# print('iou box1:', box1)# print('iou box2:', box2)if x1y1x2y2:mx = min(box1[0], box2[0])Mx = max(box1[2], box2[2])my = min(box1[1], box2[1])My = max(box1[3], box2[3])w1 = box1[2] - box1[0]h1 = box1[3] - box1[1]w2 = box2[2] - box2[0]h2 = box2[3] - box2[1]else:w1 = box1[2]h1 = box1[3]w2 = box2[2]h2 = box2[3]mx = min(box1[0], box2[0])Mx = max(box1[0] + w1, box2[0] + w2)my = min(box1[1], box2[1])My = max(box1[1] + h1, box2[1] + h2)uw = Mx - mxuh = My - mycw = w1 + w2 - uwch = h1 + h2 - uhcarea = 0if cw <= 0 or ch <= 0:return 0.0area1 = w1 * h1area2 = w2 * h2carea = cw * chuarea = area1 + area2 - careareturn carea / uareadef bbox_iou(box1, box2, x1y1x2y2=True):"""计算IOU"""if not x1y1x2y2:b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2else:b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]inter_rect_x1 = torch.max(b1_x1, b2_x1)inter_rect_y1 = torch.max(b1_y1, b2_y1)inter_rect_x2 = torch.min(b1_x2, b2_x2)inter_rect_y2 = torch.min(b1_y2, b2_y2)inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1+1e-3, min=0) * \torch.clamp(inter_rect_y2 - inter_rect_y1+1e-3, min=0)b1_area = (b1_x2 - b1_x1 + 1e-3) * (b1_y2 - b1_y1 + 1e-3)b2_area = (b2_x2 - b2_x1 + 1e-3) * (b2_y2 - b2_y1 + 1e-3)iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)return ioudef non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):# 求左上角和右下角box_corner = prediction.new(prediction.shape)box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2prediction[:, :, :4] = box_corner[:, :, :4]output = [None for _ in range(len(prediction))]for image_i, image_pred in enumerate(prediction):# 利用置信度进行第一轮筛选conf_mask = (image_pred[:, 4] >= conf_thres).squeeze()image_pred = image_pred[conf_mask]if not image_pred.size(0):continue# 获得种类及其置信度class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True)# 获得的内容为(x1, y1, x2, y2, obj_conf, class_conf, class_pred)detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)# 获得种类unique_labels = detections[:, -1].cpu().unique()if prediction.is_cuda:unique_labels = unique_labels.cuda()for c in unique_labels:# 获得某一类初步筛选后全部的预测结果detections_class = detections[detections[:, -1] == c]# 按照存在物体的置信度排序_, conf_sort_index = torch.sort(detections_class[:, 4], descending=True)detections_class = detections_class[conf_sort_index]# 进行非极大抑制max_detections = []while detections_class.size(0):# 取出这一类置信度最高的，一步一步往下判断，判断重合程度是否大于nms_thres，如果是则去除掉max_detections.append(detections_class[0].unsqueeze(0))if len(detections_class) == 1:breakious = bbox_iou(max_detections[-1], detections_class[1:])detections_class = detections_class[1:][ious < nms_thres]# 堆叠max_detections = torch.cat(max_detections).data# Add max detections to outputsoutput[image_i] = max_detections if output[image_i] is None else torch.cat((output[image_i], max_detections))return outputdef merge_bboxes(bboxes, cutx, cuty):merge_bbox = []for i in range(len(bboxes)):for box in bboxes[i]:tmp_box = []x1,y1,x2,y2 = box[0], box[1], box[2], box[3]if i == 0:if y1 > cuty or x1 > cutx:continueif y2 >= cuty and y1 <= cuty:y2 = cutyif y2-y1 < 5:continueif x2 >= cutx and x1 <= cutx:x2 = cutxif x2-x1 < 5:continueif i == 1:if y2 < cuty or x1 > cutx:continueif y2 >= cuty and y1 <= cuty:y1 = cutyif y2-y1 < 5:continueif x2 >= cutx and x1 <= cutx:x2 = cutxif x2-x1 < 5:continueif i == 2:if y2 < cuty or x2 < cutx:continueif y2 >= cuty and y1 <= cuty:y1 = cutyif y2-y1 < 5:continueif x2 >= cutx and x1 <= cutx:x1 = cutxif x2-x1 < 5:continueif i == 3:if y1 > cuty or x2 < cutx:continueif y2 >= cuty and y1 <= cuty:y2 = cutyif y2-y1 < 5:continueif x2 >= cutx and x1 <= cutx:x1 = cutxif x2-x1 < 5:continuetmp_box.append(x1)tmp_box.append(y1)tmp_box.append(x2)tmp_box.append(y2)tmp_box.append(box[-1])merge_bbox.append(tmp_box)return merge_bbox

YOLOV4-模型集成-pytorch相关推荐

上分神器：训练调参与模型集成
Datawhale干货作者:安晟,Datawhale成员本文为干货知识+赛事实践系列,对模型训练.调参流程与模型集成进行了总结,旨在理论与实践结合(零基础入门系列:数据挖掘/cv/nlp/金融风控 ...
树模型集成学习(Tree Embedding)
树模型集成学习集成学习主要有两个思想,分别是bagging和boosting.树模型的集成模型都是使用树作为基模型,最常用的cart树,常见的集成模型有RandomForest.GBDT.Xgboo ...
用C++ 和OpenCV 实现视频目标检测（YOLOv4模型）
点击上方"视学算法",选择加"星标"或"置顶" 重磅干货,第一时间送达据说,现在很多小区都上线了AI抓拍高空抛物的黑科技,可以自动分析抛物 ...
【深度学习】详解Resampling和softmax模型集成
[深度学习]详解Resampling和softmax模型集成文章目录 1 图像重采样1.1 次级采样(sub-sampling)1.2 高斯金字塔(Gaussian pyramids)1.3 上采样 ...
【软件工程】CMMI 能力成熟度模型集成 ( 简介 | 相关术语 | CMMI 等级评估次序 )
文章目录一.CMMI 简介二.CMMI 相关术语三.CMMI 等级评估次序一.CMMI 简介 CMMI 全称 Capability Maturity Model Integration , 能 ...
【天池赛事】零基础入门语义分割-地表建筑物识别 Task6：分割模型模型集成
[天池赛事]零基础入门语义分割-地表建筑物识别 Task1:赛题理解与 baseline(3 天) – 学习主题:理解赛题内容解题流程 – 学习内容:赛题理解.数据读取.比赛 baseline 构建 ...
深度学习精度提升 3 个小妙招：模型集成、知识蒸馏、自蒸馏
本文转载自 AI公园. 作者:Zeyuan Allen-Zhu 编译:ronghuaiyang 导读训练好的模型,用自己蒸馏一下,就能提高性能,是不是很神奇,这里面的原理到底是什么呢,这要从模型集成 ...
目标检测多模型集成方法总结
本文转载自AI公园. 前段时间推送了文章:难以置信的目标检测小妙招:多训练几个epochs,平均一下就能获得更好的模型 ,不少朋友对模型集成感兴趣,本文是个小总结. 作者:Vikas S Shetty ...
收藏 | 目标检测的模型集成与实验
点上方蓝字计算机视觉联盟获取更多干货在右上方 ··· 设为星标 ★,与你不见不散仅作学术分享,不代表本公众号立场,侵权联系删除转载于:作者:Vikas S Shetty | 编译:rongh ...
零基础入门语义分割-Task6 分割模型模型集成
文章目录一.集成学习方法二.深度学习中的集成学习 1.Dropout 2.TTA 3.Snapshot 一.集成学习方法在机器学习中的集成学习可以在一定程度上提高预测精度,常见的集成学习方法有S ...

YOLOV4-模型集成-pytorch

最终效果

代码位置

inference.py前向推理代码

utils.py与前向传播有关的函数

dataloader与训练有关的函数

generator与训练有关的函数

utills.py与前向传播有关的函数

YOLOV4-模型集成-pytorch相关推荐

最新文章

热门文章