最终效果

输入一张图片到模型-->模型能够检测到物体并把框画出来

代码位置

inference.py前向推理代码

# -------------------------------------#
#       创建YOLO类
# -------------------------------------#
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
import cv2
import numpy as np
import colorsys
import os
import torch
import torch.nn as nn
from yolo4 import YoloBody
from utils.utils import *
from yolo_layer import *# --------------------------------------------#
#   使用自己训练好的模型预测需要修改2个参数
#   model_path和classes_path都需要修改!
# --------------------------------------------#
class Inference(object):# ---------------------------------------------------##   初始化模型和参数,导入已经训练好的权重# ---------------------------------------------------#def __init__(self, **kwargs):self.model_path = kwargs['model_path']self.anchors_path = kwargs['anchors_path']self.classes_path = kwargs['classes_path']self.model_image_size = kwargs['model_image_size']self.confidence = kwargs['confidence']self.cuda = kwargs['cuda']self.class_names = self.get_class()   # class_names=['person','bicycle','car',...,'toothbrush']self.anchors = self.get_anchors()   # anchors=[12.0,16.0,19.0,36.0,...,401]print(self.anchors)self.net = YoloBody(3, len(self.class_names)).eval()  # .eval()表示不启用BN和Dropout层,这里的YoloBody表示从开始一直到最后面的蓝色卷积那里self.load_model_pth(self.net, self.model_path)if self.cuda:self.net = self.net.cuda()self.net.eval()print('Finished!')self.yolo_decodes = []anchor_masks = [[0,1,2],[3,4,5],[6,7,8]]for i in range(3):head = YoloLayer(self.model_image_size, anchor_masks, len(self.class_names),self.anchors, len(self.anchors)//2).eval()self.yolo_decodes.append(head)print('{} model, anchors, and classes loaded.'.format(self.model_path))def load_model_pth(self, model, pth):print('Loading weights into state dict, name: %s' % (pth))device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')model_dict = model.state_dict()pretrained_dict = torch.load(pth, map_location=device) # map_location用于切换GPU和CPUmatched_dict = {}  # 匹配字典for k, v in pretrained_dict.items(): # k是层名称,v是参数if np.shape(model_dict[k]) == np.shape(v):matched_dict[k] = velse:print('un matched layers: %s' % k)print(len(model_dict.keys()), len(pretrained_dict.keys()))print('%d layers matched,  %d layers miss' % (len(matched_dict.keys()), len(model_dict) - len(matched_dict.keys())))model_dict.update(matched_dict) # 根据matched_dict更新model_dictmodel.load_state_dict(pretrained_dict)  #print('Finished!')return model# ---------------------------------------------------##   获得所有的分类# ---------------------------------------------------#def get_class(self):classes_path = os.path.expanduser(self.classes_path) # 找到classes_path的绝对路径with open(classes_path) as f:class_names = f.readlines()class_names = [c.strip() for c in class_names]return class_names# ---------------------------------------------------##   获得所有的先验框# ---------------------------------------------------#def get_anchors(self):anchors_path = os.path.expanduser(self.anchors_path)with open(anchors_path) as f:anchors = f.readline()anchors = [float(x) for x in anchors.split(',')]return anchors#return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :]# ---------------------------------------------------##   检测图片# ---------------------------------------------------#def detect_image(self, image_src):h, w, _ = image_src.shapeimage = cv2.resize(image_src, (608, 608))image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)img = np.array(image, dtype=np.float32)img = np.transpose(img / 255.0, (2, 0, 1))images = np.asarray([img])with torch.no_grad():images = torch.from_numpy(images)if self.cuda:images = images.cuda()outputs = self.net(images)  # 调用YoloBody里面的前向传播,得到三个蓝色卷积的输出。output_list = []for i in range(3):output_list.append(self.yolo_decodes[i](outputs[i]))output = torch.cat(output_list, 1)print(output.shape)batch_detections = non_max_suppression(output, len(self.class_names),conf_thres=self.confidence,nms_thres=0.1)boxes = [box.cpu().numpy() for box in batch_detections]print(boxes[0])return boxes[0]if __name__ == '__main__':params = {"model_path": 'pth/yolo4_weights_my.pth',"anchors_path": 'work_dir/yolo_anchors_coco.txt',"classes_path": 'work_dir/coco_classes.txt',"model_image_size": (608, 608, 3),"confidence": 0.4,"cuda": True}model = Inference(**params)class_names = load_class_names(params['classes_path'])image_src = cv2.imread('dog.jpg')boxes = model.detect_image(image_src)plot_boxes_cv2(image_src, boxes, savename='output3.jpg', class_names=class_names)

utils.py与前向传播有关的函数

dataloader与训练有关的函数

from random import shuffle
import numpy as np
import torch
import torch.nn as nn
import math
import torch.nn.functional as F
from PIL import Image
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
from utils.utils import bbox_iou, merge_bboxes
from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
# from nets.yolo_training import Generator
import cv2class TestDataset(Dataset):def __init__(self, lines, image_size):super(TestDataset, self).__init__()self.test_lines = linesself.test_batches = len(lines)self.image_size = image_sizedef __len__(self):return self.test_batchesdef __getitem__(self, index):one_line = self.test_lines[index]line = one_line.split()image_src = cv2.imread(line[0])h, w, _ = image_src.shapeimage = cv2.resize(image_src, (self.image_size[1], self.image_size[0]))image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)y = np.array([np.array(list(map(float, box.split(',')))) for box in line[1:]])img = np.array(image, dtype=np.float32)img = np.transpose(img / 255.0, (2, 0, 1))return image_src, img, y, [h, w, line[0]]class TrainDataset(Dataset):def __init__(self, train_lines, image_size, mosaic=True):super(TrainDataset, self).__init__()self.train_lines = train_linesself.train_batches = len(train_lines)self.image_size = image_sizeself.mosaic = mosaicself.flag = Truedef __len__(self):return self.train_batchesdef rand(self, a=0, b=1):return np.random.rand() * (b - a) + adef get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5):"""实时数据增强的随机预处理"""line = annotation_line.split()image = Image.open(line[0])iw, ih = image.sizeh, w = input_shapebox = np.array([np.array(list(map(float, box.split(',')))) for box in line[1:]])# 调整图片大小new_ar = w / h * self.rand(1 - jitter, 1 + jitter) / self.rand(1 - jitter, 1 + jitter)scale = self.rand(.25, 2)if new_ar < 1:nh = int(scale * h)nw = int(nh * new_ar)else:nw = int(scale * w)nh = int(nw / new_ar)image = image.resize((nw, nh), Image.BICUBIC)# 放置图片dx = int(self.rand(0, w - nw))dy = int(self.rand(0, h - nh))new_image = Image.new('RGB', (w, h),(np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))new_image.paste(image, (dx, dy))image = new_image# 是否翻转图片flip = self.rand() < .5if flip:image = image.transpose(Image.FLIP_LEFT_RIGHT)# 色域变换hue = self.rand(-hue, hue)sat = self.rand(1, sat) if self.rand() < .5 else 1 / self.rand(1, sat)val = self.rand(1, val) if self.rand() < .5 else 1 / self.rand(1, val)x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)x[..., 0] += hue*360x[..., 0][x[..., 0]>1] -= 1x[..., 0][x[..., 0]<0] += 1x[..., 1] *= satx[..., 2] *= valx[x[:,:, 0]>360, 0] = 360x[:, :, 1:][x[:, :, 1:]>1] = 1x[x<0] = 0image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB)*255# 调整目标框坐标box_data = np.zeros((len(box), 5))if len(box) > 0:np.random.shuffle(box)box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dxbox[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dyif flip:box[:, [0, 2]] = w - box[:, [2, 0]]box[:, 0:2][box[:, 0:2] < 0] = 0box[:, 2][box[:, 2] > w] = wbox[:, 3][box[:, 3] > h] = hbox_w = box[:, 2] - box[:, 0]box_h = box[:, 3] - box[:, 1]box = box[np.logical_and(box_w > 1, box_h > 1)]  # 保留有效框box_data = np.zeros((len(box), 5))box_data[:len(box)] = boxif len(box) == 0:return image_data, []if (box_data[:, :4] > 0).any():return image_data, box_dataelse:return image_data, []def get_random_data_with_Mosaic(self, annotation_line, input_shape, hue=.1, sat=1.5, val=1.5):h, w = input_shapemin_offset_x = 0.3min_offset_y = 0.3scale_low = 1 - min(min_offset_x, min_offset_y)scale_high = scale_low + 0.2image_datas = []box_datas = []index = 0place_x = [0, 0, int(w * min_offset_x), int(w * min_offset_x)]place_y = [0, int(h * min_offset_y), int(w * min_offset_y), 0]for line in annotation_line:# 每一行进行分割line_content = line.split()# 打开图片image = Image.open(line_content[0])image = image.convert("RGB")# 图片的大小iw, ih = image.size# 保存框的位置box = np.array([np.array(list(map(float, box.split(',')))) for box in line_content[1:]])# 是否翻转图片flip = self.rand() < .5if flip and len(box) > 0:image = image.transpose(Image.FLIP_LEFT_RIGHT)box[:, [0, 2]] = iw - box[:, [2, 0]]# 对输入进来的图片进行缩放new_ar = w / hscale = self.rand(scale_low, scale_high)if new_ar < 1:nh = int(scale * h)nw = int(nh * new_ar)else:nw = int(scale * w)nh = int(nw / new_ar)image = image.resize((nw, nh), Image.BICUBIC)# 进行色域变换hue = self.rand(-hue, hue)sat = self.rand(1, sat) if self.rand() < .5 else 1 / self.rand(1, sat)val = self.rand(1, val) if self.rand() < .5 else 1 / self.rand(1, val)x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)x[..., 0] += hue*360x[..., 0][x[..., 0]>1] -= 1x[..., 0][x[..., 0]<0] += 1x[..., 1] *= satx[..., 2] *= valx[x[:,:, 0]>360, 0] = 360x[:, :, 1:][x[:, :, 1:]>1] = 1x[x<0] = 0image = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) # numpy array, 0 to 1image = Image.fromarray((image * 255).astype(np.uint8))# 将图片进行放置,分别对应四张分割图片的位置dx = place_x[index]dy = place_y[index]new_image = Image.new('RGB', (w, h),(np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))new_image.paste(image, (dx, dy))image_data = np.array(new_image)index = index + 1box_data = []# 对box进行重新处理if len(box) > 0:np.random.shuffle(box)box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dxbox[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dybox[:, 0:2][box[:, 0:2] < 0] = 0box[:, 2][box[:, 2] > w] = wbox[:, 3][box[:, 3] > h] = hbox_w = box[:, 2] - box[:, 0]box_h = box[:, 3] - box[:, 1]box = box[np.logical_and(box_w > 1, box_h > 1)]box_data = np.zeros((len(box), 5))box_data[:len(box)] = boximage_datas.append(image_data)box_datas.append(box_data)# 将图片分割,放在一起cutx = np.random.randint(int(w * min_offset_x), int(w * (1 - min_offset_x)))cuty = np.random.randint(int(h * min_offset_y), int(h * (1 - min_offset_y)))new_image = np.zeros([h, w, 3])new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]# 对框进行进一步的处理new_boxes = np.array(merge_bboxes(box_datas, cutx, cuty))if len(new_boxes) == 0:return new_image, []if (new_boxes[:, :4] > 0).any():return new_image, new_boxeselse:return new_image, []def __getitem__(self, index):if index == 0:shuffle(self.train_lines)lines = self.train_linesn = self.train_batchesindex = index % nif self.mosaic:if self.flag and (index + 4) < n:img, y = self.get_random_data_with_Mosaic(lines[index:index + 4], self.image_size[0:2])else:img, y = self.get_random_data(lines[index], self.image_size[0:2])self.flag = bool(1-self.flag)else:img, y = self.get_random_data(lines[index], self.image_size[0:2])if len(y) != 0:# 从坐标转换成0~1的百分比boxes = np.array(y[:, :4], dtype=np.float32)boxes[:, 0] = boxes[:, 0] / self.image_size[1]boxes[:, 1] = boxes[:, 1] / self.image_size[0]boxes[:, 2] = boxes[:, 2] / self.image_size[1]boxes[:, 3] = boxes[:, 3] / self.image_size[0]boxes = np.maximum(np.minimum(boxes, 1), 0)boxes[:, 2] = boxes[:, 2] - boxes[:, 0]boxes[:, 3] = boxes[:, 3] - boxes[:, 1]boxes[:, 0] = boxes[:, 0] + boxes[:, 2] / 2boxes[:, 1] = boxes[:, 1] + boxes[:, 3] / 2y = np.concatenate([boxes, y[:, -1:]], axis=-1)img = np.array(img, dtype=np.float32)tmp_inp = np.transpose(img / 255.0, (2, 0, 1))tmp_targets = np.array(y, dtype=np.float32)return tmp_inp, tmp_targets# DataLoader中collate_fn使用
def train_dataset_collate(batch):images = []bboxes = []for img, box in batch:images.append(img)bboxes.append(box)images = np.array(images)bboxes = np.array(bboxes)return images, bboxesdef test_dataset_collate(batch):srcs = []inputs = []targets = []shapes = []for img_src, img, labels, infos in batch:srcs.append(img_src)inputs.append(img)targets.append(labels)shapes.append(infos)inputs = np.array(inputs, dtype=np.float32)return srcs, inputs, targets, shapes

generator与训练有关的函数

import time
from PIL import Image
import numpy as np
import cv2
from random import shuffle
from utils.utils import merge_bboxesdef rand(a=0, b=1):return np.random.rand()*(b-a) + aclass TrainGenerator(object):def __init__(self, batch_size,train_lines, image_size,):self.batch_size = batch_sizeself.train_lines = train_linesself.train_batches = len(train_lines)self.image_size = image_sizeself.test_time = time.time()def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5):'''r实时数据增强的随机预处理'''line = annotation_line.split()image = Image.open(line[0])iw, ih = image.sizeh, w = input_shapebox = np.array([np.array(list(map(float, box.split(',')))) for box in line[1:]])# resize imagenew_ar = w / h * rand(1 - jitter, 1 + jitter) / rand(1 - jitter, 1 + jitter)scale = rand(.25, 2)if new_ar < 1:nh = int(scale * h)nw = int(nh * new_ar)else:nw = int(scale * w)nh = int(nw / new_ar)image = image.resize((nw, nh), Image.BICUBIC)# place imagedx = int(rand(0, w - nw))dy = int(rand(0, h - nh))new_image = Image.new('RGB', (w, h), (128, 128, 128))new_image.paste(image, (dx, dy))image = new_image# flip image or notflip = rand() < .5if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)# distort imagehue = rand(-hue, hue)sat = rand(1, sat) if rand() < .5 else 1 / rand(1, sat)val = rand(1, val) if rand() < .5 else 1 / rand(1, val)x = cv2.cvtColor(np.array(image, np.float32) / 255, cv2.COLOR_RGB2HSV)x[..., 0] += hue * 360x[..., 0][x[..., 0] > 1] -= 1x[..., 0][x[..., 0] < 0] += 1x[..., 1] *= satx[..., 2] *= valx[x[:, :, 0] > 360, 0] = 360x[:, :, 1:][x[:, :, 1:] > 1] = 1x[x < 0] = 0image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) * 255# correct boxesbox_data = np.zeros((len(box), 5))if len(box) > 0:np.random.shuffle(box)box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dxbox[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dyif flip: box[:, [0, 2]] = w - box[:, [2, 0]]box[:, 0:2][box[:, 0:2] < 0] = 0box[:, 2][box[:, 2] > w] = wbox[:, 3][box[:, 3] > h] = hbox_w = box[:, 2] - box[:, 0]box_h = box[:, 3] - box[:, 1]box = box[np.logical_and(box_w > 1, box_h > 1)]  # discard invalid boxbox_data = np.zeros((len(box), 5))box_data[:len(box)] = boxif len(box) == 0:return image_data, []if (box_data[:, :4] > 0).any():return image_data, box_dataelse:return image_data, []def get_random_data_with_Mosaic(self, annotation_line, input_shape, hue=.1, sat=1.5, val=1.5):'''random preprocessing for real-time data augmentation'''h, w = input_shapemin_offset_x = 0.4min_offset_y = 0.4scale_low = 1 - min(min_offset_x, min_offset_y)scale_high = scale_low + 0.2image_datas = []box_datas = []index = 0place_x = [0, 0, int(w * min_offset_x), int(w * min_offset_x)]place_y = [0, int(h * min_offset_y), int(w * min_offset_y), 0]for line in annotation_line:# 每一行进行分割line_content = line.split()# 打开图片image = Image.open(line_content[0])image = image.convert("RGB")# 图片的大小iw, ih = image.size# 保存框的位置box = np.array([np.array(list(map(float, box.split(',')))) for box in line_content[1:]])# 是否翻转图片flip = rand() < .5if flip and len(box) > 0:image = image.transpose(Image.FLIP_LEFT_RIGHT)box[:, [0, 2]] = iw - box[:, [2, 0]]# 对输入进来的图片进行缩放new_ar = w / hscale = rand(scale_low, scale_high)if new_ar < 1:nh = int(scale * h)nw = int(nh * new_ar)else:nw = int(scale * w)nh = int(nw / new_ar)image = image.resize((nw, nh), Image.BICUBIC)# 进行色域变换hue = rand(-hue, hue)sat = rand(1, sat) if rand() < .5 else 1 / rand(1, sat)val = rand(1, val) if rand() < .5 else 1 / rand(1, val)x = cv2.cvtColor(np.array(image, np.float32) / 255, cv2.COLOR_RGB2HSV)x[..., 0] += hue * 360x[..., 0][x[..., 0] > 1] -= 1x[..., 0][x[..., 0] < 0] += 1x[..., 1] *= satx[..., 2] *= valx[x[:, :, 0] > 360, 0] = 360x[:, :, 1:][x[:, :, 1:] > 1] = 1x[x < 0] = 0image = cv2.cvtColor(x, cv2.COLOR_HSV2RGB)  # numpy array, 0 to 1image = Image.fromarray((image * 255).astype(np.uint8))# 将图片进行放置,分别对应四张分割图片的位置dx = place_x[index]dy = place_y[index]new_image = Image.new('RGB', (w, h), (128, 128, 128))new_image.paste(image, (dx, dy))image_data = np.array(new_image)index = index + 1box_data = []# 对box进行重新处理if len(box) > 0:np.random.shuffle(box)box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dxbox[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dybox[:, 0:2][box[:, 0:2] < 0] = 0box[:, 2][box[:, 2] > w] = wbox[:, 3][box[:, 3] > h] = hbox_w = box[:, 2] - box[:, 0]box_h = box[:, 3] - box[:, 1]box = box[np.logical_and(box_w > 1, box_h > 1)]box_data = np.zeros((len(box), 5))box_data[:len(box)] = boximage_datas.append(image_data)box_datas.append(box_data)# 将图片分割,放在一起cutx = np.random.randint(int(w * min_offset_x), int(w * (1 - min_offset_x)))cuty = np.random.randint(int(h * min_offset_y), int(h * (1 - min_offset_y)))new_image = np.zeros([h, w, 3])new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]# 对框进行进一步的处理new_boxes = np.array(merge_bboxes(box_datas, cutx, cuty))if len(new_boxes) == 0:return new_image, []if (new_boxes[:, :4] > 0).any():return new_image, new_boxeselse:return new_image, []def generate(self, train=True, mosaic=True):while True:shuffle(self.train_lines)lines = self.train_linesinputs = []targets = []flag = Truen = len(lines)for i in range(len(lines)):if mosaic == True:if flag and (i + 4) < n:img, y = self.get_random_data_with_Mosaic(lines[i:i + 4], self.image_size[0:2])i = (i + 4) % nelse:img, y = self.get_random_data(lines[i], self.image_size[0:2])i = (i + 1) % nflag = bool(1 - flag)else:img, y = self.get_random_data(lines[i], self.image_size[0:2])i = (i + 1) % nif len(y) != 0:boxes = np.array(y[:, :4], dtype=np.float32)boxes[:, 0] = boxes[:, 0] / self.image_size[1]boxes[:, 1] = boxes[:, 1] / self.image_size[0]boxes[:, 2] = boxes[:, 2] / self.image_size[1]boxes[:, 3] = boxes[:, 3] / self.image_size[0]boxes = np.maximum(np.minimum(boxes, 1), 0)boxes[:, 2] = boxes[:, 2] - boxes[:, 0]boxes[:, 3] = boxes[:, 3] - boxes[:, 1]boxes[:, 0] = boxes[:, 0] + boxes[:, 2] / 2boxes[:, 1] = boxes[:, 1] + boxes[:, 3] / 2y = np.concatenate([boxes, y[:, -1:]], axis=-1)img = np.array(img, dtype=np.float32)inputs.append(np.transpose(img / 255.0, (2, 0, 1)))targets.append(np.array(y, dtype=np.float32))if len(targets) == self.batch_size:tmp_inp = np.array(inputs)tmp_targets = np.array(targets)inputs = []targets = []# print('data load use time:', time.time()-self.test_time)# self.test_time = time.time()yield tmp_inp, tmp_targetsclass TestGenerator(object):def __init__(self, batch_size, lines, image_size):self.batch_size = batch_sizeself.test_lines = linesself.test_batches = len(lines)self.image_size = image_sizedef generate(self):lines = self.test_linesinputs = []targets = []shapes = []for one_line in lines:print(one_line)line = one_line.split()image_src = cv2.imread(line[0])h, w, _ = image_src.shapeimage = cv2.resize(image_src, (self.image_size[1], self.image_size[0]))image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)y = np.array([np.array(list(map(float, box.split(',')))) for box in line[1:]])img = np.array(image, dtype=np.float32)inputs.append(np.transpose(img / 255.0, (2, 0, 1)))targets.append(y)shapes.append([h, w, line[0]])if len(targets) == self.batch_size:tmp_inp = np.array(inputs)tmp_targets = targetstmp_shapes = shapesinputs = []targets = []shapes = []# print('data load use time:', time.time()-self.test_time)# self.test_time = time.time()yield tmp_inp, tmp_targets, tmp_shapes

utills.py与前向传播有关的函数

画框函数:
导入类别函数:load_class_names
iou计算函数:
非极大值抑制函数:
from __future__ import division
import torch
import numpy as np
import math
import cv2def plot_boxes_cv2(img, boxes, savename=None, class_names=None, color=None):img = np.copy(img)colors = np.array([[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]], dtype=np.float32)def get_color(c, x, max_val):ratio = float(x) / max_val * 5i = int(math.floor(ratio))j = int(math.ceil(ratio))ratio = ratio - ir = (1 - ratio) * colors[i][c] + ratio * colors[j][c]return int(r * 255)width = img.shape[1]height = img.shape[0]for i in range(len(boxes)):box = boxes[i]x1 = int(box[0] * width)    # 相对原图位置*原图宽度,得到原图对应x坐标y1 = int(box[1] * height)   # 相对原图位置*原图宽度,得到原图对应y坐标x2 = int(box[2] * width)    #y2 = int(box[3] * height)if color:rgb = colorelse:rgb = (255, 0, 0)if len(box) >= 7 and class_names:cls_conf = box[5]cls_id = box[6]# print('%s: %f' % (class_names[cls_id], cls_conf))classes = len(class_names)offset = cls_id * 123457 % classesred = get_color(2, offset, classes)green = get_color(1, offset, classes)blue = get_color(0, offset, classes)if color is None:rgb = (red, green, blue)img = cv2.putText(img, class_names[int(cls_id)], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1.2, rgb, 2)img = cv2.rectangle(img, (x1, y1), (x2, y2), rgb, 3)if savename:print("save plot results to %s" % savename)cv2.imwrite(savename, img)return imgdef load_class_names(namesfile):class_names = []with open(namesfile, 'r') as fp:lines = fp.readlines()for line in lines:line = line.rstrip()class_names.append(line)return class_namesdef bbox_iou1(box1, box2, x1y1x2y2=True):# print('iou box1:', box1)# print('iou box2:', box2)if x1y1x2y2:mx = min(box1[0], box2[0])Mx = max(box1[2], box2[2])my = min(box1[1], box2[1])My = max(box1[3], box2[3])w1 = box1[2] - box1[0]h1 = box1[3] - box1[1]w2 = box2[2] - box2[0]h2 = box2[3] - box2[1]else:w1 = box1[2]h1 = box1[3]w2 = box2[2]h2 = box2[3]mx = min(box1[0], box2[0])Mx = max(box1[0] + w1, box2[0] + w2)my = min(box1[1], box2[1])My = max(box1[1] + h1, box2[1] + h2)uw = Mx - mxuh = My - mycw = w1 + w2 - uwch = h1 + h2 - uhcarea = 0if cw <= 0 or ch <= 0:return 0.0area1 = w1 * h1area2 = w2 * h2carea = cw * chuarea = area1 + area2 - careareturn carea / uareadef bbox_iou(box1, box2, x1y1x2y2=True):"""计算IOU"""if not x1y1x2y2:b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2else:b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]inter_rect_x1 = torch.max(b1_x1, b2_x1)inter_rect_y1 = torch.max(b1_y1, b2_y1)inter_rect_x2 = torch.min(b1_x2, b2_x2)inter_rect_y2 = torch.min(b1_y2, b2_y2)inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1+1e-3, min=0) * \torch.clamp(inter_rect_y2 - inter_rect_y1+1e-3, min=0)b1_area = (b1_x2 - b1_x1 + 1e-3) * (b1_y2 - b1_y1 + 1e-3)b2_area = (b2_x2 - b2_x1 + 1e-3) * (b2_y2 - b2_y1 + 1e-3)iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)return ioudef non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):# 求左上角和右下角box_corner = prediction.new(prediction.shape)box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2prediction[:, :, :4] = box_corner[:, :, :4]output = [None for _ in range(len(prediction))]for image_i, image_pred in enumerate(prediction):# 利用置信度进行第一轮筛选conf_mask = (image_pred[:, 4] >= conf_thres).squeeze()image_pred = image_pred[conf_mask]if not image_pred.size(0):continue# 获得种类及其置信度class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True)# 获得的内容为(x1, y1, x2, y2, obj_conf, class_conf, class_pred)detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)# 获得种类unique_labels = detections[:, -1].cpu().unique()if prediction.is_cuda:unique_labels = unique_labels.cuda()for c in unique_labels:# 获得某一类初步筛选后全部的预测结果detections_class = detections[detections[:, -1] == c]# 按照存在物体的置信度排序_, conf_sort_index = torch.sort(detections_class[:, 4], descending=True)detections_class = detections_class[conf_sort_index]# 进行非极大抑制max_detections = []while detections_class.size(0):# 取出这一类置信度最高的,一步一步往下判断,判断重合程度是否大于nms_thres,如果是则去除掉max_detections.append(detections_class[0].unsqueeze(0))if len(detections_class) == 1:breakious = bbox_iou(max_detections[-1], detections_class[1:])detections_class = detections_class[1:][ious < nms_thres]# 堆叠max_detections = torch.cat(max_detections).data# Add max detections to outputsoutput[image_i] = max_detections if output[image_i] is None else torch.cat((output[image_i], max_detections))return outputdef merge_bboxes(bboxes, cutx, cuty):merge_bbox = []for i in range(len(bboxes)):for box in bboxes[i]:tmp_box = []x1,y1,x2,y2 = box[0], box[1], box[2], box[3]if i == 0:if y1 > cuty or x1 > cutx:continueif y2 >= cuty and y1 <= cuty:y2 = cutyif y2-y1 < 5:continueif x2 >= cutx and x1 <= cutx:x2 = cutxif x2-x1 < 5:continueif i == 1:if y2 < cuty or x1 > cutx:continueif y2 >= cuty and y1 <= cuty:y1 = cutyif y2-y1 < 5:continueif x2 >= cutx and x1 <= cutx:x2 = cutxif x2-x1 < 5:continueif i == 2:if y2 < cuty or x2 < cutx:continueif y2 >= cuty and y1 <= cuty:y1 = cutyif y2-y1 < 5:continueif x2 >= cutx and x1 <= cutx:x1 = cutxif x2-x1 < 5:continueif i == 3:if y1 > cuty or x2 < cutx:continueif y2 >= cuty and y1 <= cuty:y2 = cutyif y2-y1 < 5:continueif x2 >= cutx and x1 <= cutx:x1 = cutxif x2-x1 < 5:continuetmp_box.append(x1)tmp_box.append(y1)tmp_box.append(x2)tmp_box.append(y2)tmp_box.append(box[-1])merge_bbox.append(tmp_box)return merge_bbox

YOLOV4-模型集成-pytorch相关推荐

  1. 上分神器:训练调参与模型集成

    Datawhale干货 作者:安晟,Datawhale成员 本文为干货知识+赛事实践系列,对模型训练.调参流程与模型集成进行了总结,旨在理论与实践结合(零基础入门系列:数据挖掘/cv/nlp/金融风控 ...

  2. 树模型集成学习(Tree Embedding)

    树模型集成学习 集成学习主要有两个思想,分别是bagging和boosting.树模型的集成模型都是使用树作为基模型,最常用的cart树,常见的集成模型有RandomForest.GBDT.Xgboo ...

  3. 用C++ 和OpenCV 实现视频目标检测(YOLOv4模型)

    点击上方"视学算法",选择加"星标"或"置顶" 重磅干货,第一时间送达 据说,现在很多小区都上线了AI抓拍高空抛物的黑科技,可以自动分析抛物 ...

  4. 【深度学习】详解Resampling和softmax模型集成

    [深度学习]详解Resampling和softmax模型集成 文章目录 1 图像重采样1.1 次级采样(sub-sampling)1.2 高斯金字塔(Gaussian pyramids)1.3 上采样 ...

  5. 【软件工程】CMMI 能力成熟度模型集成 ( 简介 | 相关术语 | CMMI 等级评估次序 )

    文章目录 一.CMMI 简介 二.CMMI 相关术语 三.CMMI 等级评估次序 一.CMMI 简介 CMMI 全称 Capability Maturity Model Integration , 能 ...

  6. 【天池赛事】零基础入门语义分割-地表建筑物识别 Task6:分割模型模型集成

    [天池赛事]零基础入门语义分割-地表建筑物识别 Task1:赛题理解与 baseline(3 天) – 学习主题:理解赛题内容解题流程 – 学习内容:赛题理解.数据读取.比赛 baseline 构建 ...

  7. 深度学习精度提升 3 个小妙招:模型集成、知识蒸馏、自蒸馏

    本文转载自 AI公园. 作者:Zeyuan Allen-Zhu 编译:ronghuaiyang 导读 训练好的模型,用自己蒸馏一下,就能提高性能,是不是很神奇,这里面的原理到底是什么呢,这要从模型集成 ...

  8. 目标检测多模型集成方法总结

    本文转载自AI公园. 前段时间推送了文章:难以置信的目标检测小妙招:多训练几个epochs,平均一下就能获得更好的模型 ,不少朋友对模型集成感兴趣,本文是个小总结. 作者:Vikas S Shetty ...

  9. 收藏 | 目标检测的模型集成与实验

    点上方蓝字计算机视觉联盟获取更多干货 在右上方 ··· 设为星标 ★,与你不见不散 仅作学术分享,不代表本公众号立场,侵权联系删除 转载于:作者:Vikas S Shetty  |  编译:rongh ...

  10. 零基础入门语义分割-Task6 分割模型模型集成

    文章目录 一.集成学习方法 二.深度学习中的集成学习 1.Dropout 2.TTA 3.Snapshot 一.集成学习方法 在机器学习中的集成学习可以在一定程度上提高预测精度,常见的集成学习方法有S ...

最新文章

  1. 深究AngularJS——AngularJS中的Controller(控制器)
  2. Serverless的理解
  3. caffe安装_Ubuntu 16.04 下 Theano 安装,及配置多 GPU使用(Multiple GPUs)
  4. QT5日志功能(qDebug、qWarnng、qCritical、qFatal)
  5. mysql-hash分区管理维护
  6. 对付洗稿者的一个脑洞
  7. 库、教程、论文实现,这是一份超全的PyTorch资源列表(Github 2.2K星)
  8. Day05 郝斌C语言自学视频之C语言的函数
  9. [译]关于Android图形系统一些事实真相
  10. html超链接子页面,页面html超链接怎么做
  11. 【BFS】Oliver的救援
  12. 16.04Ubuntu桌面版搭建
  13. linux用户motd,linux需要装?那就和我一起来配置一个动态的MOTD登陆效果吧
  14. springboot整合redis做缓存
  15. 【C语言】基本算法系列1:数字转字符串
  16. mac与mac传输文件_Mac上传输的最佳选择
  17. Dcloud云函数服务空间
  18. 益智乐园——DuerOS的又一盈利之路
  19. KERNEL32相关函数
  20. 微信小程序+esp8266NodeMcu(cp2102)+onenet物联平台(二)

热门文章

  1. 魔性计时器html6,最近抖音很火的6首BGM,太有魔性了!
  2. logisim 快速加法器设计实验报告_数电课程实验一二
  3. 【21.00%】【vijos P1018】智破连环阵
  4. mysql级联更新优化_mysql级联更新
  5. 2013中国企业500强榜单出炉
  6. AC|崔丽/任斌在拉曼光谱结合深度学习识别病原菌胞外囊泡
  7. 5x2cv配对t检验(5x2cv paired t test)
  8. 实证研究的步骤_实证研究方法究竟有多重要,被这5张图惊到了!
  9. javascript技巧收集(200多个)
  10. matlab trapz二重积分函数_matlab数值微积分