参考链接：

https://github.com/Tianxiaomo/pytorch-YOLOv4

模型训练+推理步骤：
1. 下载代码和预训练模型，准备数据
2. 数据预处理：使用data_process的脚本进行自己标注的数据进行处理
3. 训练模型（需要修改train.py/cfg.py/dataset.py/以及tool中关于数据预处理的相关定义，请看代码去改）
4. 测试模型（修改test.py进行预测）
5. pytorch模型转onnx模型（修改pytorch2onnx.py）
6. onnx模型删减（修改dy_resize.py，删减不支持的算子）
7. onnx转om模型（atc命令如下）
8. 测试om模型（修改pyacl代码，本地代码没用等比例缩放，主要修改acl_dvpp.py的数据预处理）
9. 对比结果（把om模型的输出拿出来，放到test_onnx.py / test_om.py中测试，对比本地模型和atlas模型的检测结果）

一、数据处理

数据处理和数据样例请参考上一篇文章：

https://blog.csdn.net/gm_Ergou/article/details/118599118

二、模型训练

这里只贴出修改过的关键代码，其余代码可从参考链接下载：

https://github.com/Tianxiaomo/pytorch-YOLOv4

代码结构：

1.train.py

# -*- coding: utf-8 -*-
import time
import logging
import os, sys, math
import argparse
from collections import deque
import datetime
import copy
import cv2
from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch import optim
from torch.nn import functional as F
from tensorboardX import SummaryWriter
from easydict import EasyDict as edictfrom dataset import Yolo_dataset
from cfg import Cfg
from models import Yolov4
from tool.darknet2pytorch import Darknetfrom tool.tv_reference.utils import collate_fn as val_collate
from tool.tv_reference.coco_utils import convert_to_coco_api
from tool.tv_reference.coco_eval import CocoEvaluatordef bboxes_iou(bboxes_a, bboxes_b, xyxy=True, GIoU=False, DIoU=False, CIoU=False):if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4:raise IndexErrorif xyxy:# intersection top lefttl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2])# intersection bottom rightbr = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:])# convex (smallest enclosing box) top left and bottom rightcon_tl = torch.min(bboxes_a[:, None, :2], bboxes_b[:, :2])con_br = torch.max(bboxes_a[:, None, 2:], bboxes_b[:, 2:])# centerpoint distance squaredrho2 = ((bboxes_a[:, None, 0] + bboxes_a[:, None, 2]) - (bboxes_b[:, 0] + bboxes_b[:, 2])) ** 2 / 4 + ((bboxes_a[:, None, 1] + bboxes_a[:, None, 3]) - (bboxes_b[:, 1] + bboxes_b[:, 3])) ** 2 / 4w1 = bboxes_a[:, 2] - bboxes_a[:, 0]h1 = bboxes_a[:, 3] - bboxes_a[:, 1]w2 = bboxes_b[:, 2] - bboxes_b[:, 0]h2 = bboxes_b[:, 3] - bboxes_b[:, 1]area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1)area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1)else:# intersection top lefttl = torch.max((bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2),(bboxes_b[:, :2] - bboxes_b[:, 2:] / 2))# intersection bottom rightbr = torch.min((bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2),(bboxes_b[:, :2] + bboxes_b[:, 2:] / 2))# convex (smallest enclosing box) top left and bottom rightcon_tl = torch.min((bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2),(bboxes_b[:, :2] - bboxes_b[:, 2:] / 2))con_br = torch.max((bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2),(bboxes_b[:, :2] + bboxes_b[:, 2:] / 2))# centerpoint distance squaredrho2 = ((bboxes_a[:, None, :2] - bboxes_b[:, :2]) ** 2 / 4).sum(dim=-1)w1 = bboxes_a[:, 2]h1 = bboxes_a[:, 3]w2 = bboxes_b[:, 2]h2 = bboxes_b[:, 3]area_a = torch.prod(bboxes_a[:, 2:], 1)area_b = torch.prod(bboxes_b[:, 2:], 1)en = (tl < br).type(tl.type()).prod(dim=2)area_i = torch.prod(br - tl, 2) * en  # * ((tl < br).all())area_u = area_a[:, None] + area_b - area_iiou = area_i / area_uif GIoU or DIoU or CIoU:if GIoU:  # Generalized IoU https://arxiv.org/pdf/1902.09630.pdfarea_c = torch.prod(con_br - con_tl, 2)  # convex areareturn iou - (area_c - area_u) / area_c  # GIoUif DIoU or CIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1# convex diagonal squaredc2 = torch.pow(con_br - con_tl, 2).sum(dim=2) + 1e-16if DIoU:return iou - rho2 / c2  # DIoUelif CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47v = (4 / math.pi ** 2) * torch.pow(torch.atan(w1 / h1).unsqueeze(1) - torch.atan(w2 / h2), 2)with torch.no_grad():alpha = v / (1 - iou + v)return iou - (rho2 / c2 + v * alpha)  # CIoUreturn iouclass Yolo_loss(nn.Module):def __init__(self, n_classes=18, n_anchors=3, device=None, batch=2):super(Yolo_loss, self).__init__()self.device = deviceself.strides = [8, 16, 32]image_size = 416self.n_classes = n_classesself.n_anchors = n_anchors# self.anchors = [[30,92], [45,88], [65,72], [76,150], [121,197], [144,109], [149,314], [279,217], [293,385]]self.anchors = [[12, 16], [19, 36], [40, 28], [36, 75], [76, 55], [72, 146], [142, 110], [192, 243], [459, 401]] #cocoself.anch_masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]self.ignore_thre = 0.5self.masked_anchors, self.ref_anchors, self.grid_x, self.grid_y, self.anchor_w, self.anchor_h = [], [], [], [], [], []for i in range(3):all_anchors_grid = [(w / self.strides[i], h / self.strides[i]) for w, h in self.anchors]masked_anchors = np.array([all_anchors_grid[j] for j in self.anch_masks[i]], dtype=np.float32)ref_anchors = np.zeros((len(all_anchors_grid), 4), dtype=np.float32)ref_anchors[:, 2:] = np.array(all_anchors_grid, dtype=np.float32)ref_anchors = torch.from_numpy(ref_anchors)# calculate pred - xywh obj clsfsize = image_size // self.strides[i]grid_x = torch.arange(fsize, dtype=torch.float).repeat(batch, 3, fsize, 1).to(device)grid_y = torch.arange(fsize, dtype=torch.float).repeat(batch, 3, fsize, 1).permute(0, 1, 3, 2).to(device)anchor_w = torch.from_numpy(masked_anchors[:, 0]).repeat(batch, fsize, fsize, 1).permute(0, 3, 1, 2).to(device)anchor_h = torch.from_numpy(masked_anchors[:, 1]).repeat(batch, fsize, fsize, 1).permute(0, 3, 1, 2).to(device)self.masked_anchors.append(masked_anchors)self.ref_anchors.append(ref_anchors)self.grid_x.append(grid_x)self.grid_y.append(grid_y)self.anchor_w.append(anchor_w)self.anchor_h.append(anchor_h)def build_target(self, pred, labels, batchsize, fsize, n_ch, output_id):# target assignmenttgt_mask = torch.zeros(batchsize, self.n_anchors, fsize, fsize, 4 + self.n_classes).to(device=self.device)obj_mask = torch.ones(batchsize, self.n_anchors, fsize, fsize).to(device=self.device)tgt_scale = torch.zeros(batchsize, self.n_anchors, fsize, fsize, 2).to(self.device)target = torch.zeros(batchsize, self.n_anchors, fsize, fsize, n_ch).to(self.device)# labels = labels.cpu().datanlabel = (labels.sum(dim=2) > 0).sum(dim=1)  # number of objectstruth_x_all = (labels[:, :, 2] + labels[:, :, 0]) / (self.strides[output_id] * 2)truth_y_all = (labels[:, :, 3] + labels[:, :, 1]) / (self.strides[output_id] * 2)truth_w_all = (labels[:, :, 2] - labels[:, :, 0]) / self.strides[output_id]truth_h_all = (labels[:, :, 3] - labels[:, :, 1]) / self.strides[output_id]truth_i_all = truth_x_all.to(torch.int16).cpu().numpy()truth_j_all = truth_y_all.to(torch.int16).cpu().numpy()for b in range(batchsize):n = int(nlabel[b])if n == 0:continuetruth_box = torch.zeros(n, 4).to(self.device)truth_box[:n, 2] = truth_w_all[b, :n]truth_box[:n, 3] = truth_h_all[b, :n]truth_i = truth_i_all[b, :n]truth_j = truth_j_all[b, :n]# calculate iou between truth and reference anchorsanchor_ious_all = bboxes_iou(truth_box.cpu(), self.ref_anchors[output_id], CIoU=True)# temp = bbox_iou(truth_box.cpu(), self.ref_anchors[output_id])best_n_all = anchor_ious_all.argmax(dim=1)best_n = best_n_all % 3best_n_mask = ((best_n_all == self.anch_masks[output_id][0]) |(best_n_all == self.anch_masks[output_id][1]) |(best_n_all == self.anch_masks[output_id][2]))if sum(best_n_mask) == 0:continuetruth_box[:n, 0] = truth_x_all[b, :n]truth_box[:n, 1] = truth_y_all[b, :n]pred_ious = bboxes_iou(pred[b].view(-1, 4), truth_box, xyxy=False)pred_best_iou, _ = pred_ious.max(dim=1)pred_best_iou = (pred_best_iou > self.ignore_thre)pred_best_iou = pred_best_iou.view(pred[b].shape[:3])# set mask to zero (ignore) if pred matches truthobj_mask[b] = ~ pred_best_ioufor ti in range(best_n.shape[0]):if best_n_mask[ti] == 1:i, j = truth_i[ti], truth_j[ti]a = best_n[ti]obj_mask[b, a, j, i] = 1tgt_mask[b, a, j, i, :] = 1target[b, a, j, i, 0] = truth_x_all[b, ti] - truth_x_all[b, ti].to(torch.int16).to(torch.float)target[b, a, j, i, 1] = truth_y_all[b, ti] - truth_y_all[b, ti].to(torch.int16).to(torch.float)target[b, a, j, i, 2] = torch.log(truth_w_all[b, ti] / torch.Tensor(self.masked_anchors[output_id])[best_n[ti], 0] + 1e-16)target[b, a, j, i, 3] = torch.log(truth_h_all[b, ti] / torch.Tensor(self.masked_anchors[output_id])[best_n[ti], 1] + 1e-16)target[b, a, j, i, 4] = 1target[b, a, j, i, 5 + labels[b, ti, 4].to(torch.int16).cpu().numpy()] = 1tgt_scale[b, a, j, i, :] = torch.sqrt(2 - truth_w_all[b, ti] * truth_h_all[b, ti] / fsize / fsize)return obj_mask, tgt_mask, tgt_scale, targetdef forward(self, xin, labels=None):loss, loss_xy, loss_wh, loss_obj, loss_cls, loss_l2 = 0, 0, 0, 0, 0, 0for output_id, output in enumerate(xin):batchsize = output.shape[0]fsize = output.shape[2]n_ch = 5 + self.n_classesoutput = output.view(batchsize, self.n_anchors, n_ch, fsize, fsize)output = output.permute(0, 1, 3, 4, 2)  # .contiguous()# logistic activation for xy, obj, clsoutput[..., np.r_[:2, 4:n_ch]] = torch.sigmoid(output[..., np.r_[:2, 4:n_ch]])pred = output[..., :4].clone()pred[..., 0] += self.grid_x[output_id]pred[..., 1] += self.grid_y[output_id]pred[..., 2] = torch.exp(pred[..., 2]) * self.anchor_w[output_id].contiguous()pred[..., 3] = torch.exp(pred[..., 3]) * self.anchor_h[output_id].contiguous()obj_mask, tgt_mask, tgt_scale, target = self.build_target(pred, labels, batchsize, fsize, n_ch, output_id)# loss calculationoutput[..., 4] *= obj_maskoutput[..., np.r_[0:4, 5:n_ch]] *= tgt_maskoutput[..., 2:4] *= tgt_scaletarget[..., 4] *= obj_masktarget[..., np.r_[0:4, 5:n_ch]] *= tgt_masktarget[..., 2:4] *= tgt_scaleloss_xy += F.binary_cross_entropy(input=output[..., :2], target=target[..., :2],weight=tgt_scale * tgt_scale, reduction='sum')loss_wh += F.mse_loss(input=output[..., 2:4], target=target[..., 2:4], reduction='sum') / 2loss_obj += F.binary_cross_entropy(input=output[..., 4], target=target[..., 4], reduction='sum')loss_cls += F.binary_cross_entropy(input=output[..., 5:], target=target[..., 5:], reduction='sum')loss_l2 += F.mse_loss(input=output, target=target, reduction='sum')loss = loss_xy + loss_wh + loss_obj + loss_clsreturn loss, loss_xy, loss_wh, loss_obj, loss_cls, loss_l2def collate(batch):images = []bboxes = []for img, box in batch:images.append([img])bboxes.append([box])images = np.concatenate(images, axis=0)images = images.transpose(0, 3, 1, 2)images = torch.from_numpy(images).div(255.0)bboxes = np.concatenate(bboxes, axis=0)bboxes = torch.from_numpy(bboxes)return images, bboxesdef train(model, device, config, epochs=5, batch_size=1, save_cp=True, log_step=20, img_scale=0.5):train_dataset = Yolo_dataset(config.train_label, config, train=True)val_dataset = Yolo_dataset(config.val_label, config, train=False)n_train = len(train_dataset)n_val = len(val_dataset)train_loader = DataLoader(train_dataset, batch_size=config.batch // config.subdivisions, shuffle=True,num_workers=8, pin_memory=True, drop_last=True, collate_fn=collate)val_loader = DataLoader(val_dataset, batch_size=config.batch // config.subdivisions, shuffle=True, num_workers=8, pin_memory=True, drop_last=True, collate_fn=val_collate)writer = SummaryWriter(log_dir=config.TRAIN_TENSORBOARD_DIR,filename_suffix=f'OPT_{config.TRAIN_OPTIMIZER}_LR_{config.learning_rate}_BS_{config.batch}_Sub_{config.subdivisions}_Size_{config.width}',comment=f'OPT_{config.TRAIN_OPTIMIZER}_LR_{config.learning_rate}_BS_{config.batch}_Sub_{config.subdivisions}_Size_{config.width}')max_itr = config.TRAIN_EPOCHS * n_trainglobal_step = 0# learning rate setupdef burnin_schedule(i):if i < config.burn_in:factor = pow(i / config.burn_in, 4)elif i < config.steps[0]:factor = 1.0elif i < config.steps[1]:factor = 0.1else:factor = 0.01return factorif config.TRAIN_OPTIMIZER.lower() == 'adam':optimizer = optim.Adam(model.parameters(),lr=config.learning_rate / config.batch,betas=(0.9, 0.999),eps=1e-08)elif config.TRAIN_OPTIMIZER.lower() == 'sgd':optimizer = optim.SGD(params=model.parameters(),lr=config.learning_rate / config.batch,momentum=config.momentum,weight_decay=config.decay)scheduler = optim.lr_scheduler.LambdaLR(optimizer, burnin_schedule)# scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', verbose=False, patience=6, min_lr=1e-7)# scheduler = CosineAnnealingWarmRestarts(optimizer, 0.001, 1e-6, 20)#losscriterion = Yolo_loss(device=device, batch=config.batch // config.subdivisions, n_classes=config.classes)# trainsaved_models = deque()model.train()for epoch in range(epochs+1):# model.train()epoch_loss = 0epoch_step = 0with tqdm(total=n_train, desc=f'Epoch {epoch + 1}/{epochs}', unit='img', ncols=50) as pbar:for i, batch in enumerate(train_loader):global_step += 1epoch_step += 1images = batch[0]bboxes = batch[1]images = images.to(device=device, dtype=torch.float32)bboxes = bboxes.to(device=device)bboxes_pred = model(images)# lossloss, loss_xy, loss_wh, loss_obj, loss_cls, loss_l2 = criterion(bboxes_pred, bboxes)# loss = loss / config.subdivisionsloss.backward()epoch_loss += loss.item()if global_step % config.subdivisions == 0:optimizer.step()# scheduler.step()scheduler.step(loss)model.zero_grad()# logif global_step % (log_step * config.subdivisions) == 0:writer.add_scalar('train/Loss', loss.item(), global_step)writer.add_scalar('train/loss_xy', loss_xy.item(), global_step)writer.add_scalar('train/loss_wh', loss_wh.item(), global_step)writer.add_scalar('train/loss_obj', loss_obj.item(), global_step)writer.add_scalar('train/loss_cls', loss_cls.item(), global_step)writer.add_scalar('train/loss_l2', loss_l2.item(), global_step)# writer.add_scalar('lr', scheduler.get_lr()[0] * config.batch, global_step)pbar.set_postfix(**{'loss (batch)': loss.item(), 'loss_xy': loss_xy.item(),'loss_wh': loss_wh.item(),'loss_obj': loss_obj.item(),'loss_cls': loss_cls.item(),'loss_l2': loss_l2.item(),# 'lr': scheduler.get_lr()[0] * config.batch})logging.debug('Train step_{}: loss : {},loss xy : {},loss wh : {},''loss obj : {}，loss cls : {},loss l2 : {},lr : {}'.format(global_step, loss.item(), loss_xy.item(),loss_wh.item(), loss_obj.item(),loss_cls.item(), loss_l2.item(),0.0001# scheduler.get_lr()[0] * config.batch))pbar.update(images.shape[0])if epoch%10==0:if cfg.use_darknet_cfg:eval_model = Darknet(cfg.cfgfile, inference=True)else:eval_model = Yolov4(cfg.pretrained, n_classes=cfg.classes, inference=True)if torch.cuda.device_count() > 1:eval_model.load_state_dict(model.module.state_dict())else:eval_model.load_state_dict(model.state_dict())eval_model.to(device)evaluator = evaluate(eval_model, val_loader, config, device)del eval_modelstats = evaluator.coco_eval['bbox'].statswriter.add_scalar('train/AP', stats[0], global_step)writer.add_scalar('train/AP50', stats[1], global_step)writer.add_scalar('train/AP75', stats[2], global_step)writer.add_scalar('train/AP_small', stats[3], global_step)writer.add_scalar('train/AP_medium', stats[4], global_step)writer.add_scalar('train/AP_large', stats[5], global_step)writer.add_scalar('train/AR1', stats[6], global_step)writer.add_scalar('train/AR10', stats[7], global_step)writer.add_scalar('train/AR100', stats[8], global_step)writer.add_scalar('train/AR_small', stats[9], global_step)writer.add_scalar('train/AR_medium', stats[10], global_step)writer.add_scalar('train/AR_large', stats[11], global_step)if epoch%20==0:if not os.path.exists(config.save_path):os.makedirs(config.save_path)if not os.path.exists(config.save_path2):os.makedirs(config.save_path2)model_name = f'yolov4_{epoch}.pth'torch.save(model.state_dict(), config.save_path + model_name)torch.save(model, config.save_path2 + model_name)logging.info(f'yolov4_{epoch} saved !')writer.close()@torch.no_grad()
def evaluate(model, data_loader, cfg, device, logger=None, **kwargs):""" finished, tested"""# cpu_device = torch.device("cpu")model.eval()# header = 'Test:'coco = convert_to_coco_api(data_loader.dataset, bbox_fmt='coco')coco_evaluator = CocoEvaluator(coco, iou_types = ["bbox"], bbox_fmt='coco')for images, targets in data_loader:model_input = [[cv2.resize(img, (cfg.w, cfg.h))] for img in images]model_input = np.concatenate(model_input, axis=0)model_input = model_input.transpose(0, 3, 1, 2)model_input = torch.from_numpy(model_input).div(255.0)model_input = model_input.to(device)targets = [{k: v.to(device) for k, v in t.items()} for t in targets]if torch.cuda.is_available():torch.cuda.synchronize()model_time = time.time()outputs = model(model_input)# outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]model_time = time.time() - model_time# outputs = outputs.cpu().detach().numpy()res = {}# for img, target, output in zip(images, targets, outputs):for img, target, boxes, confs in zip(images, targets, outputs[0], outputs[1]):img_height, img_width = img.shape[:2]# boxes = output[...,:4].copy()  # output boxes in yolo formatboxes = boxes.squeeze(2).cpu().detach().numpy()boxes[...,2:] = boxes[...,2:] - boxes[...,:2] # Transform [x1, y1, x2, y2] to [x1, y1, w, h]boxes[...,0] = boxes[...,0]*img_widthboxes[...,1] = boxes[...,1]*img_heightboxes[...,2] = boxes[...,2]*img_widthboxes[...,3] = boxes[...,3]*img_heightboxes = torch.as_tensor(boxes, dtype=torch.float32)# confs = output[...,4:].copy()confs = confs.cpu().detach().numpy()labels = np.argmax(confs, axis=1).flatten()labels = torch.as_tensor(labels, dtype=torch.int64)scores = np.max(confs, axis=1).flatten()scores = torch.as_tensor(scores, dtype=torch.float32)res[target["image_id"].item()] = {"boxes": boxes,"scores": scores,"labels": labels,}evaluator_time = time.time()coco_evaluator.update(res)evaluator_time = time.time() - evaluator_time# gather the stats from all processescoco_evaluator.synchronize_between_processes()# accumulate predictions from all imagescoco_evaluator.accumulate()coco_evaluator.summarize()return coco_evaluatordef init_logger(log_file=None, log_dir=None, log_level=logging.INFO, mode='w', stdout=True):"""log_dir: 日志文件的文件夹路径mode: 'a', append; 'w', 覆盖原文件写入."""def get_date_str():now = datetime.datetime.now()return now.strftime('%Y-%m-%d_%H-%M-%S')fmt = '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s: %(message)s'if log_dir is None:log_dir = '~/log/'if log_file is None:log_file = 'log_' + get_date_str() + '.txt'if not os.path.exists(log_dir):os.makedirs(log_dir)log_file = os.path.join(log_dir, log_file)# 此处不能使用logging输出print('log file path:' + log_file)logging.basicConfig(level=logging.DEBUG,format=fmt,filename=log_file,filemode=mode)if stdout:console = logging.StreamHandler(stream=sys.stdout)console.setLevel(log_level)formatter = logging.Formatter(fmt)console.setFormatter(formatter)logging.getLogger('').addHandler(console)return loggingdef _get_date_str():now = datetime.datetime.now()return now.strftime('%Y-%m-%d_%H-%M')def get_args(**kwargs):cfg = kwargsparser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)parser.add_argument('-l', '--learning-rate', metavar='LR', type=float, nargs='?', default=0.001, dest='learning_rate')parser.add_argument('-f', '--load', dest='load', type=str, default='data/model/yolov4.pth', help='Load model from a .pth file')parser.add_argument('-g', '--gpu', metavar='G', type=str, default='-1',help='GPU', dest='gpu')parser.add_argument('-dir', '--data-dir', type=str, default='', help='dataset dir', dest='dataset_dir')parser.add_argument('-pretrained', type=str, default='data/model/yolov4.conv.137.pth', help='pretrained yolov4.conv.137')parser.add_argument('-classes', type=int, default=3, help='dataset classes')parser.add_argument('-train_label_path', dest='train_label', type=str, default='data/dataset/coins/train.txt')parser.add_argument('-val_label_path', dest='val_label', type=str, default='data/dataset/coins/train.txt')parser.add_argument('-optimizer', type=str, default='adam', help='training optimizer', dest='TRAIN_OPTIMIZER')parser.add_argument('-iou-type', type=str, default='iou', help='iou type (iou, giou, diou, ciou)', dest='iou_type')parser.add_argument('-TRAIN_EPOCHS', type=int, default=200)args = vars(parser.parse_args())# for k in args.keys():#     cfg[k] = args.get(k)cfg.update(args)return edict(cfg)if __name__ == "__main__":cfg = get_args(**Cfg)os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpulogging = init_logger(log_dir='data/log')if cfg.use_darknet_cfg:model = Darknet(cfg.cfgfile)else:model = Yolov4(cfg.pretrained, n_classes=cfg.classes)if torch.cuda.device_count() > 1:model = torch.nn.DataParallel(model)device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')model.to(device=device)# for param in model.backbone.parameters():#     param.requires_grad = Truetrain(model=model, config=cfg, epochs=cfg.TRAIN_EPOCHS, device=device)

2.test.py

import torch
from torch import nn
import torch.nn.functional as F
from tool.torch_utils import *
from tool.yolo_layer import YoloLayer
import sys
import cv2class Mish(torch.nn.Module):def __init__(self):super().__init__()def forward(self, x):x = x * (torch.tanh(torch.nn.functional.softplus(x)))return xclass Upsample(nn.Module):def __init__(self):super(Upsample, self).__init__()def forward(self, x, target_size, inference=False):assert (x.data.dim() == 4)# _, _, tH, tW = target_sizeif inference:#B = x.data.size(0)#C = x.data.size(1)#H = x.data.size(2)#W = x.data.size(3)return x.view(x.size(0), x.size(1), x.size(2), 1, x.size(3), 1).\expand(x.size(0), x.size(1), x.size(2), target_size[2] // x.size(2), x.size(3), target_size[3] // x.size(3)).\contiguous().view(x.size(0), x.size(1), target_size[2], target_size[3])else:return F.interpolate(x, size=(target_size[2], target_size[3]), mode='nearest')class Conv_Bn_Activation(nn.Module):def __init__(self, in_channels, out_channels, kernel_size, stride, activation, bn=True, bias=False):super().__init__()pad = (kernel_size - 1) // 2self.conv = nn.ModuleList()if bias:self.conv.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, pad))else:self.conv.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, pad, bias=False))if bn:self.conv.append(nn.BatchNorm2d(out_channels))if activation == "mish":self.conv.append(Mish())elif activation == "relu":self.conv.append(nn.ReLU(inplace=True))elif activation == "leaky":self.conv.append(nn.LeakyReLU(0.1, inplace=True))elif activation == "linear":passelse:print("activate error !!! {} {} {}".format(sys._getframe().f_code.co_filename,sys._getframe().f_code.co_name, sys._getframe().f_lineno))def forward(self, x):for l in self.conv:x = l(x)return xclass ResBlock(nn.Module):"""Sequential residual blocks each of which consists of \two convolution layers.Args:ch (int): number of input and output channels.nblocks (int): number of residual blocks.shortcut (bool): if True, residual tensor addition is enabled."""def __init__(self, ch, nblocks=1, shortcut=True):super().__init__()self.shortcut = shortcutself.module_list = nn.ModuleList()for i in range(nblocks):resblock_one = nn.ModuleList()resblock_one.append(Conv_Bn_Activation(ch, ch, 1, 1, 'mish'))resblock_one.append(Conv_Bn_Activation(ch, ch, 3, 1, 'mish'))self.module_list.append(resblock_one)def forward(self, x):for module in self.module_list:h = xfor res in module:h = res(h)x = x + h if self.shortcut else hreturn xclass DownSample1(nn.Module):def __init__(self):super().__init__()self.conv1 = Conv_Bn_Activation(3, 32, 3, 1, 'mish')self.conv2 = Conv_Bn_Activation(32, 64, 3, 2, 'mish')self.conv3 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')# [route]# layers = -2self.conv4 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')self.conv5 = Conv_Bn_Activation(64, 32, 1, 1, 'mish')self.conv6 = Conv_Bn_Activation(32, 64, 3, 1, 'mish')# [shortcut]# from=-3# activation = linearself.conv7 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')# [route]# layers = -1, -7self.conv8 = Conv_Bn_Activation(128, 64, 1, 1, 'mish')def forward(self, input):x1 = self.conv1(input)x2 = self.conv2(x1)x3 = self.conv3(x2)# route -2x4 = self.conv4(x2)x5 = self.conv5(x4)x6 = self.conv6(x5)# shortcut -3x6 = x6 + x4x7 = self.conv7(x6)# [route]# layers = -1, -7x7 = torch.cat([x7, x3], dim=1)x8 = self.conv8(x7)return x8class DownSample2(nn.Module):def __init__(self):super().__init__()self.conv1 = Conv_Bn_Activation(64, 128, 3, 2, 'mish')self.conv2 = Conv_Bn_Activation(128, 64, 1, 1, 'mish')# r -2self.conv3 = Conv_Bn_Activation(128, 64, 1, 1, 'mish')self.resblock = ResBlock(ch=64, nblocks=2)# s -3self.conv4 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')# r -1 -10self.conv5 = Conv_Bn_Activation(128, 128, 1, 1, 'mish')def forward(self, input):x1 = self.conv1(input)x2 = self.conv2(x1)x3 = self.conv3(x1)r = self.resblock(x3)x4 = self.conv4(r)x4 = torch.cat([x4, x2], dim=1)x5 = self.conv5(x4)return x5class DownSample3(nn.Module):def __init__(self):super().__init__()self.conv1 = Conv_Bn_Activation(128, 256, 3, 2, 'mish')self.conv2 = Conv_Bn_Activation(256, 128, 1, 1, 'mish')self.conv3 = Conv_Bn_Activation(256, 128, 1, 1, 'mish')self.resblock = ResBlock(ch=128, nblocks=8)self.conv4 = Conv_Bn_Activation(128, 128, 1, 1, 'mish')self.conv5 = Conv_Bn_Activation(256, 256, 1, 1, 'mish')def forward(self, input):x1 = self.conv1(input)x2 = self.conv2(x1)x3 = self.conv3(x1)r = self.resblock(x3)x4 = self.conv4(r)x4 = torch.cat([x4, x2], dim=1)x5 = self.conv5(x4)return x5class DownSample4(nn.Module):def __init__(self):super().__init__()self.conv1 = Conv_Bn_Activation(256, 512, 3, 2, 'mish')self.conv2 = Conv_Bn_Activation(512, 256, 1, 1, 'mish')self.conv3 = Conv_Bn_Activation(512, 256, 1, 1, 'mish')self.resblock = ResBlock(ch=256, nblocks=8)self.conv4 = Conv_Bn_Activation(256, 256, 1, 1, 'mish')self.conv5 = Conv_Bn_Activation(512, 512, 1, 1, 'mish')def forward(self, input):x1 = self.conv1(input)x2 = self.conv2(x1)x3 = self.conv3(x1)r = self.resblock(x3)x4 = self.conv4(r)x4 = torch.cat([x4, x2], dim=1)x5 = self.conv5(x4)return x5class DownSample5(nn.Module):def __init__(self):super().__init__()self.conv1 = Conv_Bn_Activation(512, 1024, 3, 2, 'mish')self.conv2 = Conv_Bn_Activation(1024, 512, 1, 1, 'mish')self.conv3 = Conv_Bn_Activation(1024, 512, 1, 1, 'mish')self.resblock = ResBlock(ch=512, nblocks=4)self.conv4 = Conv_Bn_Activation(512, 512, 1, 1, 'mish')self.conv5 = Conv_Bn_Activation(1024, 1024, 1, 1, 'mish')def forward(self, input):x1 = self.conv1(input)x2 = self.conv2(x1)x3 = self.conv3(x1)r = self.resblock(x3)x4 = self.conv4(r)x4 = torch.cat([x4, x2], dim=1)x5 = self.conv5(x4)return x5class Neck(nn.Module):def __init__(self, inference=False):super().__init__()self.inference = inferenceself.conv1 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')self.conv2 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')self.conv3 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')# SPPself.maxpool1 = nn.MaxPool2d(kernel_size=5, stride=1, padding=5 // 2)self.maxpool2 = nn.MaxPool2d(kernel_size=9, stride=1, padding=9 // 2)self.maxpool3 = nn.MaxPool2d(kernel_size=13, stride=1, padding=13 // 2)# R -1 -3 -5 -6# SPPself.conv4 = Conv_Bn_Activation(2048, 512, 1, 1, 'leaky')self.conv5 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')self.conv6 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')self.conv7 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')# UPself.upsample1 = Upsample()# R 85self.conv8 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')# R -1 -3self.conv9 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')self.conv10 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')self.conv11 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')self.conv12 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')self.conv13 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')self.conv14 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')# UPself.upsample2 = Upsample()# R 54self.conv15 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')# R -1 -3self.conv16 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')self.conv17 = Conv_Bn_Activation(128, 256, 3, 1, 'leaky')self.conv18 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')self.conv19 = Conv_Bn_Activation(128, 256, 3, 1, 'leaky')self.conv20 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')def forward(self, input, downsample4, downsample3, inference=False):x1 = self.conv1(input)x2 = self.conv2(x1)x3 = self.conv3(x2)# SPPm1 = self.maxpool1(x3)m2 = self.maxpool2(x3)m3 = self.maxpool3(x3)spp = torch.cat([m3, m2, m1, x3], dim=1)# SPP endx4 = self.conv4(spp)x5 = self.conv5(x4)x6 = self.conv6(x5)x7 = self.conv7(x6)# UPup = self.upsample1(x7, downsample4.size(), self.inference)# R 85x8 = self.conv8(downsample4)# R -1 -3x8 = torch.cat([x8, up], dim=1)x9 = self.conv9(x8)x10 = self.conv10(x9)x11 = self.conv11(x10)x12 = self.conv12(x11)x13 = self.conv13(x12)x14 = self.conv14(x13)# UPup = self.upsample2(x14, downsample3.size(), self.inference)# R 54x15 = self.conv15(downsample3)# R -1 -3x15 = torch.cat([x15, up], dim=1)x16 = self.conv16(x15)x17 = self.conv17(x16)x18 = self.conv18(x17)x19 = self.conv19(x18)x20 = self.conv20(x19)return x20, x13, x6class Yolov4Head(nn.Module):def __init__(self, output_ch, n_classes, inference=False):super().__init__()self.inference = inferenceself.conv1 = Conv_Bn_Activation(128, 256, 3, 1, 'leaky')self.conv2 = Conv_Bn_Activation(256, output_ch, 1, 1, 'linear', bn=False, bias=True)self.yolo1 = YoloLayer(anchor_mask=[0, 1, 2], num_classes=n_classes,anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],num_anchors=9, stride=8)# R -4self.conv3 = Conv_Bn_Activation(128, 256, 3, 2, 'leaky')# R -1 -16self.conv4 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')self.conv5 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')self.conv6 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')self.conv7 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')self.conv8 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')self.conv9 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')self.conv10 = Conv_Bn_Activation(512, output_ch, 1, 1, 'linear', bn=False, bias=True)self.yolo2 = YoloLayer(anchor_mask=[3, 4, 5], num_classes=n_classes,anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],num_anchors=9, stride=16)# R -4self.conv11 = Conv_Bn_Activation(256, 512, 3, 2, 'leaky')# R -1 -37self.conv12 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')self.conv13 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')self.conv14 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')self.conv15 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')self.conv16 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')self.conv17 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')self.conv18 = Conv_Bn_Activation(1024, output_ch, 1, 1, 'linear', bn=False, bias=True)self.yolo3 = YoloLayer(anchor_mask=[6, 7, 8], num_classes=n_classes,anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],num_anchors=9, stride=32)def forward(self, input1, input2, input3):x1 = self.conv1(input1)x2 = self.conv2(x1)x3 = self.conv3(input1)# R -1 -16x3 = torch.cat([x3, input2], dim=1)x4 = self.conv4(x3)x5 = self.conv5(x4)x6 = self.conv6(x5)x7 = self.conv7(x6)x8 = self.conv8(x7)x9 = self.conv9(x8)x10 = self.conv10(x9)# R -4x11 = self.conv11(x8)# R -1 -37x11 = torch.cat([x11, input3], dim=1)x12 = self.conv12(x11)x13 = self.conv13(x12)x14 = self.conv14(x13)x15 = self.conv15(x14)x16 = self.conv16(x15)x17 = self.conv17(x16)x18 = self.conv18(x17)if self.inference:y1 = self.yolo1(x2)y2 = self.yolo2(x10)y3 = self.yolo3(x18)return get_region_boxes([y1, y2, y3])else:return [x2, x10, x18]class Yolov4(nn.Module):def __init__(self, yolov4conv137weight=None, n_classes=80, inference=False):super().__init__()output_ch = (4 + 1 + n_classes) * 3# backboneself.down1 = DownSample1()self.down2 = DownSample2()self.down3 = DownSample3()self.down4 = DownSample4()self.down5 = DownSample5()# neckself.neek = Neck(inference)# yolov4conv137if yolov4conv137weight:_model = nn.Sequential(self.down1, self.down2, self.down3, self.down4, self.down5, self.neek)pretrained_dict = torch.load(yolov4conv137weight)model_dict = _model.state_dict()# 1. filter out unnecessary keyspretrained_dict = {k1: v for (k, v), k1 in zip(pretrained_dict.items(), model_dict)}# 2. overwrite entries in the existing state dictmodel_dict.update(pretrained_dict)_model.load_state_dict(model_dict)# headself.head = Yolov4Head(output_ch, n_classes, inference)def forward(self, input):d1 = self.down1(input)d2 = self.down2(d1)d3 = self.down3(d2)d4 = self.down4(d3)d5 = self.down5(d4)x20, x13, x6 = self.neek(d5, d4, d3)output = self.head(x20, x13, x6)return outputif __name__ == "__main__":from tool.utils import load_class_names, plot_boxes_cv2from tool.torch_utils import do_detect# 参数width=416height=416n_classes=3imgfile="data/test/test.jpg"weightfile="data/model1/yolov4_200.pth"namesfile="data/dataset/coins.names"# weightfile='data/model/yolov4.pth'# namesfile="data/dataset/coco.names"model = Yolov4(yolov4conv137weight=None, n_classes=n_classes, inference=True)pretrained_dict = torch.load(weightfile, map_location=torch.device('cpu'))model.load_state_dict(pretrained_dict)img = cv2.imread(imgfile)input_img = cv2.resize(img, (width, height))input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB)boxes = do_detect(model, input_img, 0.4, 0.6, use_cuda=False)if namesfile == None:if n_classes == 20:namesfile = 'data/voc.names'elif n_classes == 80:namesfile = 'data/coco.names'else:print("please give namefile")class_names = load_class_names(namesfile)plot_boxes_cv2(img, boxes[0], 'result/predictions.jpg', class_names)

3.cfg.py

# -*- coding: utf-8 -*-
import os
from easydict import EasyDictCfg = EasyDict()Cfg.use_darknet_cfg = False
Cfg.cfgfile = 'cfg/yolov4.cfg'Cfg.width = 416
Cfg.height = 416
Cfg.classes = 3
Cfg.TRAIN_EPOCHS = 200
Cfg.train_label = 'data/dataset/coins/train.txt'
Cfg.val_label = 'data/dataset/coins/train.txt'
Cfg.TRAIN_OPTIMIZER = 'adam'Cfg.save_path = 'data/model1/'
Cfg.save_path2 = 'data/model2/'
Cfg.TRAIN_TENSORBOARD_DIR = 'data/log'Cfg.batch = 1
Cfg.subdivisions = 1
Cfg.channels = 3
Cfg.momentum = 0.949
Cfg.decay = 0.0005
Cfg.angle = 0
Cfg.saturation = 1.5
Cfg.exposure = 1.5
Cfg.hue = .1Cfg.learning_rate = 0.00261
Cfg.burn_in = 1000
Cfg.max_batches = 500500
Cfg.steps = [400000, 450000]
Cfg.policy = Cfg.steps
Cfg.scales = .1, .1
Cfg.cutmix = 0
Cfg.mosaic = 1Cfg.letter_box = 0
Cfg.jitter = 0.2
Cfg.track = 0
Cfg.w = Cfg.width
Cfg.h = Cfg.height
Cfg.flip = 1
Cfg.blur = 0
Cfg.gaussian = 0
Cfg.boxes = 60  # box numCfg.iou_type = 'iou'  # 'giou', 'diou', 'ciou'
if Cfg.mosaic and Cfg.cutmix:Cfg.mixup = 4
elif Cfg.cutmix:Cfg.mixup = 2
elif Cfg.mosaic:Cfg.mixup = 3Cfg.keep_checkpoint_max = 10

4.dataset.py

# -*- coding: utf-8 -*-
'''
@Time          : 2020/05/06 21:09
@Author        : Tianxiaomo
@File          : dataset.py
@Noice         :
@Modificattion :@Author    :@Time      :@Detail    :'''
import os
import random
import sysimport cv2
import numpy as npimport torch
from torch.utils.data.dataset import Datasetdef rand_uniform_strong(min, max):if min > max:swap = minmin = maxmax = swapreturn random.random() * (max - min) + mindef rand_scale(s):scale = rand_uniform_strong(1, s)if random.randint(0, 1) % 2:return scalereturn 1. / scaledef rand_precalc_random(min, max, random_part):if max < min:swap = minmin = maxmax = swapreturn (random_part * (max - min)) + mindef fill_truth_detection(bboxes, num_boxes, classes, flip, dx, dy, sx, sy, net_w, net_h):if bboxes.shape[0] == 0:return bboxes, 10000np.random.shuffle(bboxes)bboxes[:, 0] -= dxbboxes[:, 2] -= dxbboxes[:, 1] -= dybboxes[:, 3] -= dybboxes[:, 0] = np.clip(bboxes[:, 0], 0, sx)bboxes[:, 2] = np.clip(bboxes[:, 2], 0, sx)bboxes[:, 1] = np.clip(bboxes[:, 1], 0, sy)bboxes[:, 3] = np.clip(bboxes[:, 3], 0, sy)out_box = list(np.where(((bboxes[:, 1] == sy) & (bboxes[:, 3] == sy)) |((bboxes[:, 0] == sx) & (bboxes[:, 2] == sx)) |((bboxes[:, 1] == 0) & (bboxes[:, 3] == 0)) |((bboxes[:, 0] == 0) & (bboxes[:, 2] == 0)))[0])list_box = list(range(bboxes.shape[0]))for i in out_box:list_box.remove(i)bboxes = bboxes[list_box]if bboxes.shape[0] == 0:return bboxes, 10000bboxes = bboxes[np.where((bboxes[:, 4] < classes) & (bboxes[:, 4] >= 0))[0]]if bboxes.shape[0] > num_boxes:bboxes = bboxes[:num_boxes]min_w_h = np.array([bboxes[:, 2] - bboxes[:, 0], bboxes[:, 3] - bboxes[:, 1]]).min()bboxes[:, 0] *= (net_w / sx)bboxes[:, 2] *= (net_w / sx)bboxes[:, 1] *= (net_h / sy)bboxes[:, 3] *= (net_h / sy)if flip:temp = net_w - bboxes[:, 0]bboxes[:, 0] = net_w - bboxes[:, 2]bboxes[:, 2] = tempreturn bboxes, min_w_hdef rect_intersection(a, b):minx = max(a[0], b[0])miny = max(a[1], b[1])maxx = min(a[2], b[2])maxy = min(a[3], b[3])return [minx, miny, maxx, maxy]def image_data_augmentation(mat, w, h, pleft, ptop, swidth, sheight, flip, dhue, dsat, dexp, gaussian_noise, blur, truth):try:img = matoh, ow, _ = img.shapepleft, ptop, swidth, sheight = int(pleft), int(ptop), int(swidth), int(sheight)# cropsrc_rect = [pleft, ptop, swidth + pleft, sheight + ptop]  # x1,y1,x2,y2img_rect = [0, 0, ow, oh]new_src_rect = rect_intersection(src_rect, img_rect)  # 交集dst_rect = [max(0, -pleft), max(0, -ptop), max(0, -pleft) + new_src_rect[2] - new_src_rect[0],max(0, -ptop) + new_src_rect[3] - new_src_rect[1]]# cv2.Mat sizedif (src_rect[0] == 0 and src_rect[1] == 0 and src_rect[2] == img.shape[0] and src_rect[3] == img.shape[1]):sized = cv2.resize(img, (w, h), cv2.INTER_LINEAR)else:cropped = np.zeros([sheight, swidth, 3])cropped[:, :, ] = np.mean(img, axis=(0, 1))cropped[dst_rect[1]:dst_rect[3], dst_rect[0]:dst_rect[2]] = \img[new_src_rect[1]:new_src_rect[3], new_src_rect[0]:new_src_rect[2]]# resizesized = cv2.resize(cropped, (w, h), cv2.INTER_LINEAR)# flipif flip:# cv2.Mat croppedsized = cv2.flip(sized, 1)  # 0 - x-axis, 1 - y-axis, -1 - both axes (x & y)# HSV augmentation# cv2.COLOR_BGR2HSV, cv2.COLOR_RGB2HSV, cv2.COLOR_HSV2BGR, cv2.COLOR_HSV2RGBif dsat != 1 or dexp != 1 or dhue != 0:if img.shape[2] >= 3:hsv_src = cv2.cvtColor(sized.astype(np.float32), cv2.COLOR_RGB2HSV)  # RGB to HSVhsv = cv2.split(hsv_src)hsv[1] *= dsathsv[2] *= dexphsv[0] += 179 * dhuehsv_src = cv2.merge(hsv)sized = np.clip(cv2.cvtColor(hsv_src, cv2.COLOR_HSV2RGB), 0, 255)  # HSV to RGB (the same as previous)else:sized *= dexpif blur:if blur == 1:dst = cv2.GaussianBlur(sized, (17, 17), 0)# cv2.bilateralFilter(sized, dst, 17, 75, 75)else:ksize = (blur / 2) * 2 + 1dst = cv2.GaussianBlur(sized, (ksize, ksize), 0)if blur == 1:img_rect = [0, 0, sized.cols, sized.rows]for b in truth:left = (b.x - b.w / 2.) * sized.shape[1]width = b.w * sized.shape[1]top = (b.y - b.h / 2.) * sized.shape[0]height = b.h * sized.shape[0]roi(left, top, width, height)roi = roi & img_rectdst[roi[0]:roi[0] + roi[2], roi[1]:roi[1] + roi[3]] = sized[roi[0]:roi[0] + roi[2],roi[1]:roi[1] + roi[3]]sized = dstif gaussian_noise:noise = np.array(sized.shape)gaussian_noise = min(gaussian_noise, 127)gaussian_noise = max(gaussian_noise, 0)cv2.randn(noise, 0, gaussian_noise)  # mean and variancesized = sized + noiseexcept:print("OpenCV can't augment image: " + str(w) + " x " + str(h))sized = matreturn sizeddef filter_truth(bboxes, dx, dy, sx, sy, xd, yd):bboxes[:, 0] -= dxbboxes[:, 2] -= dxbboxes[:, 1] -= dybboxes[:, 3] -= dybboxes[:, 0] = np.clip(bboxes[:, 0], 0, sx)bboxes[:, 2] = np.clip(bboxes[:, 2], 0, sx)bboxes[:, 1] = np.clip(bboxes[:, 1], 0, sy)bboxes[:, 3] = np.clip(bboxes[:, 3], 0, sy)out_box = list(np.where(((bboxes[:, 1] == sy) & (bboxes[:, 3] == sy)) |((bboxes[:, 0] == sx) & (bboxes[:, 2] == sx)) |((bboxes[:, 1] == 0) & (bboxes[:, 3] == 0)) |((bboxes[:, 0] == 0) & (bboxes[:, 2] == 0)))[0])list_box = list(range(bboxes.shape[0]))for i in out_box:list_box.remove(i)bboxes = bboxes[list_box]bboxes[:, 0] += xdbboxes[:, 2] += xdbboxes[:, 1] += ydbboxes[:, 3] += ydreturn bboxesdef blend_truth_mosaic(out_img, img, bboxes, w, h, cut_x, cut_y, i_mixup, left_shift, right_shift, top_shift, bot_shift):left_shift = min(left_shift, w - cut_x)top_shift = min(top_shift, h - cut_y)right_shift = min(right_shift, cut_x)bot_shift = min(bot_shift, cut_y)if i_mixup == 0:bboxes = filter_truth(bboxes, left_shift, top_shift, cut_x, cut_y, 0, 0)out_img[:cut_y, :cut_x] = img[top_shift:top_shift + cut_y, left_shift:left_shift + cut_x]if i_mixup == 1:bboxes = filter_truth(bboxes, cut_x - right_shift, top_shift, w - cut_x, cut_y, cut_x, 0)out_img[:cut_y, cut_x:] = img[top_shift:top_shift + cut_y, cut_x - right_shift:w - right_shift]if i_mixup == 2:bboxes = filter_truth(bboxes, left_shift, cut_y - bot_shift, cut_x, h - cut_y, 0, cut_y)out_img[cut_y:, :cut_x] = img[cut_y - bot_shift:h - bot_shift, left_shift:left_shift + cut_x]if i_mixup == 3:bboxes = filter_truth(bboxes, cut_x - right_shift, cut_y - bot_shift, w - cut_x, h - cut_y, cut_x, cut_y)out_img[cut_y:, cut_x:] = img[cut_y - bot_shift:h - bot_shift, cut_x - right_shift:w - right_shift]return out_img, bboxesdef draw_box(img, bboxes):for b in bboxes:img = cv2.rectangle(img, (b[0], b[1]), (b[2], b[3]), (0, 255, 0), 2)return imgclass Yolo_dataset(Dataset):def __init__(self, lable_path, cfg, train=True):super(Yolo_dataset, self).__init__()if cfg.mixup == 2:print("cutmix=1 - isn't supported for Detector")raiseelif cfg.mixup == 2 and cfg.letter_box:print("Combination: letter_box=1 & mosaic=1 - isn't supported, use only 1 of these parameters")raiseself.cfg = cfgself.train = traintruth = {}f = open(lable_path, 'r', encoding='utf-8')for line in f.readlines():data = line.split(" ")truth[data[0]] = []for i in data[1:]:truth[data[0]].append([int(float(j)) for j in i.split(',')])self.truth = truthself.imgs = list(self.truth.keys())def __len__(self):return len(self.truth.keys())def __getitem__(self, index):if not self.train:return self._get_val_item(index)img_path = self.imgs[index]bboxes = np.array(self.truth.get(img_path), dtype=np.float)img_path = os.path.join(self.cfg.dataset_dir, img_path)use_mixup = self.cfg.mixupif random.randint(0, 1):use_mixup = 0if use_mixup == 3:min_offset = 0.2cut_x = random.randint(int(self.cfg.w * min_offset), int(self.cfg.w * (1 - min_offset)))cut_y = random.randint(int(self.cfg.h * min_offset), int(self.cfg.h * (1 - min_offset)))r1, r2, r3, r4, r_scale = 0, 0, 0, 0, 0dhue, dsat, dexp, flip, blur = 0, 0, 0, 0, 0gaussian_noise = 0out_img = np.zeros([self.cfg.h, self.cfg.w, 3])out_bboxes = []for i in range(use_mixup + 1):if i != 0:img_path = random.choice(list(self.truth.keys()))bboxes = np.array(self.truth.get(img_path), dtype=np.float)img_path = os.path.join(self.cfg.dataset_dir, img_path)img = cv2.imread(img_path)img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)if img is None:continueoh, ow, oc = img.shapedh, dw, dc = np.array(np.array([oh, ow, oc]) * self.cfg.jitter, dtype=np.int)dhue = rand_uniform_strong(-self.cfg.hue, self.cfg.hue)dsat = rand_scale(self.cfg.saturation)dexp = rand_scale(self.cfg.exposure)pleft = random.randint(-dw, dw)pright = random.randint(-dw, dw)ptop = random.randint(-dh, dh)pbot = random.randint(-dh, dh)flip = random.randint(0, 1) if self.cfg.flip else 0if (self.cfg.blur):tmp_blur = random.randint(0, 2)  # 0 - disable, 1 - blur background, 2 - blur the whole imageif tmp_blur == 0:blur = 0elif tmp_blur == 1:blur = 1else:blur = self.cfg.blurif self.cfg.gaussian and random.randint(0, 1):gaussian_noise = self.cfg.gaussianelse:gaussian_noise = 0if self.cfg.letter_box:img_ar = ow / ohnet_ar = self.cfg.w / self.cfg.hresult_ar = img_ar / net_ar# print(" ow = %d, oh = %d, w = %d, h = %d, img_ar = %f, net_ar = %f, result_ar = %f \n", ow, oh, w, h, img_ar, net_ar, result_ar);if result_ar > 1:  # sheight - should be increasedoh_tmp = ow / net_ardelta_h = (oh_tmp - oh) / 2ptop = ptop - delta_hpbot = pbot - delta_h# print(" result_ar = %f, oh_tmp = %f, delta_h = %d, ptop = %f, pbot = %f \n", result_ar, oh_tmp, delta_h, ptop, pbot);else:  # swidth - should be increasedow_tmp = oh * net_ardelta_w = (ow_tmp - ow) / 2pleft = pleft - delta_wpright = pright - delta_w# printf(" result_ar = %f, ow_tmp = %f, delta_w = %d, pleft = %f, pright = %f \n", result_ar, ow_tmp, delta_w, pleft, pright);swidth = ow - pleft - prightsheight = oh - ptop - pbottruth, min_w_h = fill_truth_detection(bboxes, self.cfg.boxes, self.cfg.classes, flip, pleft, ptop, swidth,sheight, self.cfg.w, self.cfg.h)if (min_w_h / 8) < blur and blur > 1:  # disable blur if one of the objects is too smallblur = min_w_h / 8ai = image_data_augmentation(img, self.cfg.w, self.cfg.h, pleft, ptop, swidth, sheight, flip,dhue, dsat, dexp, gaussian_noise, blur, truth)if use_mixup == 0:out_img = aiout_bboxes = truthif use_mixup == 1:if i == 0:old_img = ai.copy()old_truth = truth.copy()elif i == 1:out_img = cv2.addWeighted(ai, 0.5, old_img, 0.5)out_bboxes = np.concatenate([old_truth, truth], axis=0)elif use_mixup == 3:if flip:tmp = pleftpleft = prightpright = tmpleft_shift = int(min(cut_x, max(0, (-int(pleft) * self.cfg.w / swidth))))top_shift = int(min(cut_y, max(0, (-int(ptop) * self.cfg.h / sheight))))right_shift = int(min((self.cfg.w - cut_x), max(0, (-int(pright) * self.cfg.w / swidth))))bot_shift = int(min(self.cfg.h - cut_y, max(0, (-int(pbot) * self.cfg.h / sheight))))out_img, out_bbox = blend_truth_mosaic(out_img, ai, truth.copy(), self.cfg.w, self.cfg.h, cut_x,cut_y, i, left_shift, right_shift, top_shift, bot_shift)out_bboxes.append(out_bbox)if use_mixup == 3:out_bboxes = np.concatenate(out_bboxes, axis=0)out_bboxes1 = np.zeros([self.cfg.boxes, 5])out_bboxes1[:min(out_bboxes.shape[0], self.cfg.boxes)] = out_bboxes[:min(out_bboxes.shape[0], self.cfg.boxes)]# print(out_bboxes1[:min(out_bboxes.shape[0], self.cfg.boxes)])return out_img, out_bboxes1def _get_val_item(self, index):img_path = self.imgs[index]bboxes_with_cls_id = np.array(self.truth.get(img_path), dtype=np.float)img = cv2.imread(os.path.join(self.cfg.dataset_dir, img_path))# img_height, img_width = img.shape[:2]img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)# img = cv2.resize(img, (self.cfg.w, self.cfg.h))# img = torch.from_numpy(img.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0)num_objs = len(bboxes_with_cls_id)target = {}# boxes to coco formatboxes = bboxes_with_cls_id[...,:4]boxes[..., 2:] = boxes[..., 2:] - boxes[..., :2]  # box width, box heighttarget['boxes'] = torch.as_tensor(boxes, dtype=torch.float32)target['labels'] = torch.as_tensor(bboxes_with_cls_id[...,-1].flatten(), dtype=torch.int64)target['image_id'] = torch.tensor([get_image_id(img_path)])target['area'] = (target['boxes'][:,3])*(target['boxes'][:,2])target['iscrowd'] = torch.zeros((num_objs,), dtype=torch.int64)return img, targetdef get_image_id(filename):img_id=os.path.splitext(os.path.basename(filename))[0].split('-')[-1]return int(img_id)if __name__ == "__main__":from cfg import Cfgimport matplotlib.pyplot as pltrandom.seed(2020)np.random.seed(2020)Cfg.dataset_dir = '/mnt/e/Dataset'dataset = Yolo_dataset(Cfg.train_label, Cfg)for i in range(100):out_img, out_bboxes = dataset.__getitem__(i)a = draw_box(out_img.copy(), out_bboxes.astype(np.int32))plt.imshow(a.astype(np.int32))plt.show()

5.models.py

import torch
from torch import nn
import torch.nn.functional as F
from tool.torch_utils import *
from tool.yolo_layer import YoloLayer
import sys
import cv2class Mish(torch.nn.Module):def __init__(self):super().__init__()def forward(self, x):x = x * (torch.tanh(torch.nn.functional.softplus(x)))return xclass Upsample(nn.Module):def __init__(self):super(Upsample, self).__init__()def forward(self, x, target_size, inference=False):assert (x.data.dim() == 4)# _, _, tH, tW = target_sizeif inference:#B = x.data.size(0)#C = x.data.size(1)#H = x.data.size(2)#W = x.data.size(3)return x.view(x.size(0), x.size(1), x.size(2), 1, x.size(3), 1).\expand(x.size(0), x.size(1), x.size(2), target_size[2] // x.size(2), x.size(3), target_size[3] // x.size(3)).\contiguous().view(x.size(0), x.size(1), target_size[2], target_size[3])else:return F.interpolate(x, size=(target_size[2], target_size[3]), mode='nearest')class Conv_Bn_Activation(nn.Module):def __init__(self, in_channels, out_channels, kernel_size, stride, activation, bn=True, bias=False):super().__init__()pad = (kernel_size - 1) // 2self.conv = nn.ModuleList()if bias:self.conv.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, pad))else:self.conv.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, pad, bias=False))if bn:self.conv.append(nn.BatchNorm2d(out_channels))if activation == "mish":self.conv.append(Mish())elif activation == "relu":self.conv.append(nn.ReLU(inplace=True))elif activation == "leaky":self.conv.append(nn.LeakyReLU(0.1, inplace=True))elif activation == "linear":passelse:print("activate error !!! {} {} {}".format(sys._getframe().f_code.co_filename,sys._getframe().f_code.co_name, sys._getframe().f_lineno))def forward(self, x):for l in self.conv:x = l(x)return xclass ResBlock(nn.Module):"""Sequential residual blocks each of which consists of \two convolution layers.Args:ch (int): number of input and output channels.nblocks (int): number of residual blocks.shortcut (bool): if True, residual tensor addition is enabled."""def __init__(self, ch, nblocks=1, shortcut=True):super().__init__()self.shortcut = shortcutself.module_list = nn.ModuleList()for i in range(nblocks):resblock_one = nn.ModuleList()resblock_one.append(Conv_Bn_Activation(ch, ch, 1, 1, 'mish'))resblock_one.append(Conv_Bn_Activation(ch, ch, 3, 1, 'mish'))self.module_list.append(resblock_one)def forward(self, x):for module in self.module_list:h = xfor res in module:h = res(h)x = x + h if self.shortcut else hreturn xclass DownSample1(nn.Module):def __init__(self):super().__init__()self.conv1 = Conv_Bn_Activation(3, 32, 3, 1, 'mish')self.conv2 = Conv_Bn_Activation(32, 64, 3, 2, 'mish')self.conv3 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')# [route]# layers = -2self.conv4 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')self.conv5 = Conv_Bn_Activation(64, 32, 1, 1, 'mish')self.conv6 = Conv_Bn_Activation(32, 64, 3, 1, 'mish')# [shortcut]# from=-3# activation = linearself.conv7 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')# [route]# layers = -1, -7self.conv8 = Conv_Bn_Activation(128, 64, 1, 1, 'mish')def forward(self, input):x1 = self.conv1(input)x2 = self.conv2(x1)x3 = self.conv3(x2)# route -2x4 = self.conv4(x2)x5 = self.conv5(x4)x6 = self.conv6(x5)# shortcut -3x6 = x6 + x4x7 = self.conv7(x6)# [route]# layers = -1, -7x7 = torch.cat([x7, x3], dim=1)x8 = self.conv8(x7)return x8class DownSample2(nn.Module):def __init__(self):super().__init__()self.conv1 = Conv_Bn_Activation(64, 128, 3, 2, 'mish')self.conv2 = Conv_Bn_Activation(128, 64, 1, 1, 'mish')# r -2self.conv3 = Conv_Bn_Activation(128, 64, 1, 1, 'mish')self.resblock = ResBlock(ch=64, nblocks=2)# s -3self.conv4 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')# r -1 -10self.conv5 = Conv_Bn_Activation(128, 128, 1, 1, 'mish')def forward(self, input):x1 = self.conv1(input)x2 = self.conv2(x1)x3 = self.conv3(x1)r = self.resblock(x3)x4 = self.conv4(r)x4 = torch.cat([x4, x2], dim=1)x5 = self.conv5(x4)return x5class DownSample3(nn.Module):def __init__(self):super().__init__()self.conv1 = Conv_Bn_Activation(128, 256, 3, 2, 'mish')self.conv2 = Conv_Bn_Activation(256, 128, 1, 1, 'mish')self.conv3 = Conv_Bn_Activation(256, 128, 1, 1, 'mish')self.resblock = ResBlock(ch=128, nblocks=8)self.conv4 = Conv_Bn_Activation(128, 128, 1, 1, 'mish')self.conv5 = Conv_Bn_Activation(256, 256, 1, 1, 'mish')def forward(self, input):x1 = self.conv1(input)x2 = self.conv2(x1)x3 = self.conv3(x1)r = self.resblock(x3)x4 = self.conv4(r)x4 = torch.cat([x4, x2], dim=1)x5 = self.conv5(x4)return x5class DownSample4(nn.Module):def __init__(self):super().__init__()self.conv1 = Conv_Bn_Activation(256, 512, 3, 2, 'mish')self.conv2 = Conv_Bn_Activation(512, 256, 1, 1, 'mish')self.conv3 = Conv_Bn_Activation(512, 256, 1, 1, 'mish')self.resblock = ResBlock(ch=256, nblocks=8)self.conv4 = Conv_Bn_Activation(256, 256, 1, 1, 'mish')self.conv5 = Conv_Bn_Activation(512, 512, 1, 1, 'mish')def forward(self, input):x1 = self.conv1(input)x2 = self.conv2(x1)x3 = self.conv3(x1)r = self.resblock(x3)x4 = self.conv4(r)x4 = torch.cat([x4, x2], dim=1)x5 = self.conv5(x4)return x5class DownSample5(nn.Module):def __init__(self):super().__init__()self.conv1 = Conv_Bn_Activation(512, 1024, 3, 2, 'mish')self.conv2 = Conv_Bn_Activation(1024, 512, 1, 1, 'mish')self.conv3 = Conv_Bn_Activation(1024, 512, 1, 1, 'mish')self.resblock = ResBlock(ch=512, nblocks=4)self.conv4 = Conv_Bn_Activation(512, 512, 1, 1, 'mish')self.conv5 = Conv_Bn_Activation(1024, 1024, 1, 1, 'mish')def forward(self, input):x1 = self.conv1(input)x2 = self.conv2(x1)x3 = self.conv3(x1)r = self.resblock(x3)x4 = self.conv4(r)x4 = torch.cat([x4, x2], dim=1)x5 = self.conv5(x4)return x5class Neck(nn.Module):def __init__(self, inference=False):super().__init__()self.inference = inferenceself.conv1 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')self.conv2 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')self.conv3 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')# SPPself.maxpool1 = nn.MaxPool2d(kernel_size=5, stride=1, padding=5 // 2)self.maxpool2 = nn.MaxPool2d(kernel_size=9, stride=1, padding=9 // 2)self.maxpool3 = nn.MaxPool2d(kernel_size=13, stride=1, padding=13 // 2)# R -1 -3 -5 -6# SPPself.conv4 = Conv_Bn_Activation(2048, 512, 1, 1, 'leaky')self.conv5 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')self.conv6 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')self.conv7 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')# UPself.upsample1 = Upsample()# R 85self.conv8 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')# R -1 -3self.conv9 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')self.conv10 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')self.conv11 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')self.conv12 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')self.conv13 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')self.conv14 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')# UPself.upsample2 = Upsample()# R 54self.conv15 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')# R -1 -3self.conv16 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')self.conv17 = Conv_Bn_Activation(128, 256, 3, 1, 'leaky')self.conv18 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')self.conv19 = Conv_Bn_Activation(128, 256, 3, 1, 'leaky')self.conv20 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')def forward(self, input, downsample4, downsample3, inference=False):x1 = self.conv1(input)x2 = self.conv2(x1)x3 = self.conv3(x2)# SPPm1 = self.maxpool1(x3)m2 = self.maxpool2(x3)m3 = self.maxpool3(x3)spp = torch.cat([m3, m2, m1, x3], dim=1)# SPP endx4 = self.conv4(spp)x5 = self.conv5(x4)x6 = self.conv6(x5)x7 = self.conv7(x6)# UPup = self.upsample1(x7, downsample4.size(), self.inference)print(downsample4.size())print(downsample3.size())# R 85x8 = self.conv8(downsample4)# R -1 -3x8 = torch.cat([x8, up], dim=1)x9 = self.conv9(x8)x10 = self.conv10(x9)x11 = self.conv11(x10)x12 = self.conv12(x11)x13 = self.conv13(x12)x14 = self.conv14(x13)# UPup = self.upsample2(x14, downsample3.size(), self.inference)# R 54x15 = self.conv15(downsample3)# R -1 -3x15 = torch.cat([x15, up], dim=1)x16 = self.conv16(x15)x17 = self.conv17(x16)x18 = self.conv18(x17)x19 = self.conv19(x18)x20 = self.conv20(x19)return x20, x13, x6class Yolov4Head(nn.Module):def __init__(self, output_ch, n_classes, inference=False):super().__init__()self.inference = inferenceself.conv1 = Conv_Bn_Activation(128, 256, 3, 1, 'leaky')self.conv2 = Conv_Bn_Activation(256, output_ch, 1, 1, 'linear', bn=False, bias=True)self.yolo1 = YoloLayer(anchor_mask=[0, 1, 2], num_classes=n_classes,anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],num_anchors=9, stride=8)# R -4self.conv3 = Conv_Bn_Activation(128, 256, 3, 2, 'leaky')# R -1 -16self.conv4 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')self.conv5 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')self.conv6 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')self.conv7 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')self.conv8 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')self.conv9 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')self.conv10 = Conv_Bn_Activation(512, output_ch, 1, 1, 'linear', bn=False, bias=True)self.yolo2 = YoloLayer(anchor_mask=[3, 4, 5], num_classes=n_classes,anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],num_anchors=9, stride=16)# R -4self.conv11 = Conv_Bn_Activation(256, 512, 3, 2, 'leaky')# R -1 -37self.conv12 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')self.conv13 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')self.conv14 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')self.conv15 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')self.conv16 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')self.conv17 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')self.conv18 = Conv_Bn_Activation(1024, output_ch, 1, 1, 'linear', bn=False, bias=True)self.yolo3 = YoloLayer(anchor_mask=[6, 7, 8], num_classes=n_classes,anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],num_anchors=9, stride=32)def forward(self, input1, input2, input3):x1 = self.conv1(input1)x2 = self.conv2(x1)x3 = self.conv3(input1)# R -1 -16x3 = torch.cat([x3, input2], dim=1)x4 = self.conv4(x3)x5 = self.conv5(x4)x6 = self.conv6(x5)x7 = self.conv7(x6)x8 = self.conv8(x7)x9 = self.conv9(x8)x10 = self.conv10(x9)# R -4x11 = self.conv11(x8)# R -1 -37x11 = torch.cat([x11, input3], dim=1)x12 = self.conv12(x11)x13 = self.conv13(x12)x14 = self.conv14(x13)x15 = self.conv15(x14)x16 = self.conv16(x15)x17 = self.conv17(x16)x18 = self.conv18(x17)if self.inference:y1 = self.yolo1(x2)y2 = self.yolo2(x10)y3 = self.yolo3(x18)return get_region_boxes([y1, y2, y3])else:return [x2, x10, x18]class Yolov4(nn.Module):def __init__(self, yolov4conv137weight=None, n_classes=80, inference=False):super().__init__()output_ch = (4 + 1 + n_classes) * 3# backboneself.down1 = DownSample1()self.down2 = DownSample2()self.down3 = DownSample3()self.down4 = DownSample4()self.down5 = DownSample5()# neckself.neek = Neck(inference)# yolov4conv137if yolov4conv137weight:_model = nn.Sequential(self.down1, self.down2, self.down3, self.down4, self.down5, self.neek)pretrained_dict = torch.load(yolov4conv137weight)model_dict = _model.state_dict()# 1. filter out unnecessary keyspretrained_dict = {k1: v for (k, v), k1 in zip(pretrained_dict.items(), model_dict)}# 2. overwrite entries in the existing state dictmodel_dict.update(pretrained_dict)_model.load_state_dict(model_dict)# headself.head = Yolov4Head(output_ch, n_classes, inference)def forward(self, input):d1 = self.down1(input)d2 = self.down2(d1)d3 = self.down3(d2)d4 = self.down4(d3)d5 = self.down5(d4)x20, x13, x6 = self.neek(d5, d4, d3)output = self.head(x20, x13, x6)return outputif __name__ == "__main__":from tool.utils import load_class_names, plot_boxes_cv2from tool.torch_utils import do_detect# 参数width=608height=608n_classes=80imgfile="data/test/dog.jpg"# weightfile="data/model1/yolov4_150.pth"# namesfile="data/dataset/mydata/new_classes.names"weightfile='data/model/yolov4.pth'namesfile="data/dataset/coco.names"model = Yolov4(yolov4conv137weight=None, n_classes=n_classes, inference=True)pretrained_dict = torch.load(weightfile, map_location=torch.device('cpu'))model.load_state_dict(pretrained_dict)img = cv2.imread(imgfile)input_img = cv2.resize(img, (width, height))input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB)for i in range(2):boxes = do_detect(model, input_img, 0.4, 0.6, use_cuda=False)# if namesfile == None:#     if n_classes == 20:#         namesfile = 'data/voc.names'#     elif n_classes == 80:#         namesfile = 'data/coco.names'#     else:#         print("please give namefile")# class_names = load_class_names(namesfile)# plot_boxes_cv2(img, boxes[0], 'result/predictions.jpg', class_names)

昇腾modelzoo复现yolov4_v1（模型训练+网络定义）相关推荐

【CANN训练营第三季】基于昇腾PyTorch框架的模型训练调优
文章目录性能分析工具PyTorch Profiling 性能分析工具CANN Profiling 结业考核 1.使用Pytorch实现LeNet网络的minist手写数字识别. 2.采用课程中学习到 ...
modelzoo，WDSR模型训练脚本问题
[环境信息] [操作步骤&问题现象] 1.安装环境.Mindspore1.7确认安装成功. 2.安装候先跑通单p,但是单p的没有提示修改deveiceid的位置,我就直接运行脚本,报错如下: ...
听说你的模型训练耗时太长？来昇腾开发者沙龙找解决方案
随着人工智能的发展,传统的计算模式无法应对非标准情况.预计五年之后AI计算所消耗的算力,将会占到全社会算力消耗总量的80%.面对如此庞大的算力需求,需要业界不断探索新的计算架构,提高计算速度. 多场景 ...
【飞桨模型复现计划】SRCNN网络-超分辨率重建
项目简介本项目是paperweekly paddlepaddle复现活动的第23篇论文<Single Image Super-Resolution Using Deep Learning> ...
【华为云技术分享】网络场景AI模型训练效率实践
问题在网络场景下的AI模型训练的过程中,KPI异常检测项目需要对设备内多模块.多类型数据,并根据波形以及异常表现进行分析,这样的数据量往往较大,对内存和性能要求较高. 同时,在设计优化算法时,需要快 ...
化合物分子 ogb、dgl生成图网络及GNN模型训练；pgl图框架
参考:https://towardsdatascience.com/learn-to-smell-molecules-with-graph-convolutional-neural-networks- ...
推荐一款超级好用的AI模型训练平台——Tesra超算网络！
2019独角兽企业重金招聘Python工程师标准>>> 作为一个刚接触AI行业的小萌新,好不容易做好了一个深度学习的模型,然后就兴冲冲的想要在我的电脑上做训练,自我感觉我的笔记本的性 ...
AI模型训练无需购买设备啦！Tesar超算网络让AI模型训练更便捷！
现代科技的发展可以用日新月异来形容,新技术的出现也是层出不穷.一个眨眼的功夫,一门足以改变世界的应用可能就被发明出来了,当然也有可能一个遥遥领先的企业瞬间被超越.处在风云变化时代,最重要的就是时间,就 ...
Ultralytics公司YOLOv8来了(训练自己的数据集并基于NVIDIA TensorRT和华为昇腾端到端模型加速)--跟不上“卷“的节奏
Official YOLOv8 训练自己的数据集并基于NVIDIA TensorRT和华为昇腾端到端模型加速说明: 本项目支持YOLOv8的对应的package的版本是:ultralytics-8. ...
预训练网络的模型微调方法
是什么神经网络需要数据来训练,从数据中获得信息,进而转化成相应的权重.这些权重能够被提取出来,迁移到其他的神经网络中. 迁移学习:通过使用之前在大数据集上经过训练的预训练模型,我们可以直接使用相应的 ...

昇腾modelzoo复现yolov4_v1（模型训练+网络定义）