基于keras和tensorflow的yolo3物体检测

在这里不具体介绍yolo的算法原理了，网上资源非常多，这里只谈如何去跑通yolo。那好吧，先上代码。。。

yolo3代码

这是代码中的一部分，通过加载提前训练好的yolo3模型来进行物体检测，注意代码中加注释的地方，修改成自己路径即可。

import argparse
import os
import numpy as np
from keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D
from keras.layers.merge import add, concatenate
from keras.models import Model
import struct
import cv2np.set_printoptions(threshold=np.nan)
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"argparser = argparse.ArgumentParser(description='test yolov3 network with coco weights')argparser.add_argument('-w C:\\Users\\new\\Desktop\\yolov3.weights','--weights',help='path to weights file')argparser.add_argument('-i C:\\Users\\new\\Desktop\\dog.jpg','--image',help='path to image file')class WeightReader:def __init__(self, weight_file):#这里使加载的模型路径，关于模型，后面有下载链接with open("C:\\Users\\new\\Desktop\\yolov3.weights", 'rb') as w_f:major,    = struct.unpack('i', w_f.read(4))minor,    = struct.unpack('i', w_f.read(4))revision, = struct.unpack('i', w_f.read(4))if (major*10 + minor) >= 2 and major < 1000 and minor < 1000:w_f.read(8)else:w_f.read(4)transpose = (major > 1000) or (minor > 1000)binary = w_f.read()self.offset = 0self.all_weights = np.frombuffer(binary, dtype='float32')def read_bytes(self, size):self.offset = self.offset + sizereturn self.all_weights[self.offset-size:self.offset]def load_weights(self, model):for i in range(106):try:conv_layer = model.get_layer('conv_' + str(i))print("loading weights of convolution #" + str(i))if i not in [81, 93, 105]:norm_layer = model.get_layer('bnorm_' + str(i))size = np.prod(norm_layer.get_weights()[0].shape)beta  = self.read_bytes(size) # biasgamma = self.read_bytes(size) # scalemean  = self.read_bytes(size) # meanvar   = self.read_bytes(size) # varianceweights = norm_layer.set_weights([gamma, beta, mean, var])  if len(conv_layer.get_weights()) > 1:bias   = self.read_bytes(np.prod(conv_layer.get_weights()[1].shape))kernel = self.read_bytes(np.prod(conv_layer.get_weights()[0].shape))kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))kernel = kernel.transpose([2,3,1,0])conv_layer.set_weights([kernel, bias])else:kernel = self.read_bytes(np.prod(conv_layer.get_weights()[0].shape))kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))kernel = kernel.transpose([2,3,1,0])conv_layer.set_weights([kernel])except ValueError:print("no convolution #" + str(i))     def reset(self):self.offset = 0class BoundBox:def __init__(self, xmin, ymin, xmax, ymax, objness = None, classes = None):self.xmin = xminself.ymin = yminself.xmax = xmaxself.ymax = ymaxself.objness = objnessself.classes = classesself.label = -1self.score = -1def get_label(self):if self.label == -1:self.label = np.argmax(self.classes)return self.labeldef get_score(self):if self.score == -1:self.score = self.classes[self.get_label()]return self.scoredef _conv_block(inp, convs, skip=True):x = inpcount = 0for conv in convs:if count == (len(convs) - 2) and skip:skip_connection = xcount += 1if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # peculiar padding as darknet prefer left and topx = Conv2D(conv['filter'], conv['kernel'], strides=conv['stride'], padding='valid' if conv['stride'] > 1 else 'same', # peculiar padding as darknet prefer left and topname='conv_' + str(conv['layer_idx']), use_bias=False if conv['bnorm'] else True)(x)if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x)return add([skip_connection, x]) if skip else xdef _interval_overlap(interval_a, interval_b):x1, x2 = interval_ax3, x4 = interval_bif x3 < x1:if x4 < x1:return 0else:return min(x2,x4) - x1else:if x2 < x3:return 0else:return min(x2,x4) - x3          def _sigmoid(x):return 1. / (1. + np.exp(-x))def bbox_iou(box1, box2):intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])intersect = intersect_w * intersect_hw1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.yminw2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.yminunion = w1*h1 + w2*h2 - intersectreturn float(intersect) / uniondef make_yolov3_model():input_image = Input(shape=(None, None, 3))# Layer  0 => 4x = _conv_block(input_image, [{'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0},{'filter': 64, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 1},{'filter': 32, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 2},{'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3}])# Layer  5 => 8x = _conv_block(x, [{'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5},{'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 6},{'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7}])# Layer  9 => 11x = _conv_block(x, [{'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9},{'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10}])# Layer 12 => 15x = _conv_block(x, [{'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12},{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 13},{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14}])# Layer 16 => 36for i in range(7):x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3},{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}])skip_36 = x# Layer 37 => 40x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37},{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38},{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39}])# Layer 41 => 61for i in range(7):x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3},{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}])skip_61 = x# Layer 62 => 65x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63},{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}])# Layer 66 => 74for i in range(3):x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3},{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])# Layer 75 => 79x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75},{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76},{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 77},{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 78},{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}], skip=False)# Layer 80 => 82yolo_82 = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 80},{'filter':  255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], skip=False)# Layer 83 => 86x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 84}], skip=False)x = UpSampling2D(2)(x)x = concatenate([x, skip_61])# Layer 87 => 91x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87},{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 88},{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 89},{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 90},{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91}], skip=False)# Layer 92 => 94yolo_94 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 92},{'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], skip=False)# Layer 95 => 98x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True,   'layer_idx': 96}], skip=False)x = UpSampling2D(2)(x)x = concatenate([x, skip_36])# Layer 99 => 106yolo_106 = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 99},{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 100},{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 101},{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 102},{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 103},{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 104},{'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], skip=False)model = Model(input_image, [yolo_82, yolo_94, yolo_106])    return modeldef preprocess_input(image, net_h, net_w):new_h, new_w, _ = image.shape# determine the new size of the imageif (float(net_w)/new_w) < (float(net_h)/new_h):new_h = (new_h * net_w)/new_wnew_w = net_welse:new_w = (new_w * net_h)/new_hnew_h = net_h# resize the image to the new sizeresized = cv2.resize(image[:,:,::-1]/255., (int(new_w), int(new_h)))# embed the image into the standard letter boxnew_image = np.ones((net_h, net_w, 3)) * 0.5new_image[int((net_h-new_h)//2):int((net_h+new_h)//2), int((net_w-new_w)//2):int((net_w+new_w)//2), :] = resizednew_image = np.expand_dims(new_image, 0)return new_imagedef decode_netout(netout, anchors, obj_thresh, nms_thresh, net_h, net_w):grid_h, grid_w = netout.shape[:2]nb_box = 3netout = netout.reshape((grid_h, grid_w, nb_box, -1))nb_class = netout.shape[-1] - 5boxes = []netout[..., :2]  = _sigmoid(netout[..., :2])netout[..., 4:]  = _sigmoid(netout[..., 4:])netout[..., 5:]  = netout[..., 4][..., np.newaxis] * netout[..., 5:]netout[..., 5:] *= netout[..., 5:] > obj_threshfor i in range(grid_h*grid_w):row = i / grid_wcol = i % grid_wfor b in range(nb_box):# 4th element is objectness scoreobjectness = netout[int(row)][int(col)][b][4]#objectness = netout[..., :4]if(objectness.all() <= obj_thresh): continue# first 4 elements are x, y, w, and hx, y, w, h = netout[int(row)][int(col)][b][:4]x = (col + x) / grid_w # center position, unit: image widthy = (row + y) / grid_h # center position, unit: image heightw = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image widthh = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height# last elements are class probabilitiesclasses = netout[int(row)][col][b][5:]box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)#box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, None, classes)boxes.append(box)return boxesdef correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):if (float(net_w)/image_w) < (float(net_h)/image_h):new_w = net_wnew_h = (image_h*net_w)/image_welse:new_h = net_wnew_w = (image_w*net_h)/image_hfor i in range(len(boxes)):x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_wy_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_hboxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)def do_nms(boxes, nms_thresh):if len(boxes) > 0:nb_class = len(boxes[0].classes)else:returnfor c in range(nb_class):sorted_indices = np.argsort([-box.classes[c] for box in boxes])for i in range(len(sorted_indices)):index_i = sorted_indices[i]if boxes[index_i].classes[c] == 0: continuefor j in range(i+1, len(sorted_indices)):index_j = sorted_indices[j]if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:boxes[index_j].classes[c] = 0def draw_boxes(image, boxes, labels, obj_thresh):for box in boxes:label_str = ''label = -1for i in range(len(labels)):if box.classes[i] > obj_thresh:label_str += labels[i]label = iprint(labels[i] + ': ' + str(box.classes[i]*100) + '%')if label >= 0:cv2.rectangle(image, (box.xmin,box.ymin), (box.xmax,box.ymax), (0,255,0), 2)cv2.putText(image, label_str + ' ' + str(box.get_score()), (box.xmin, box.ymin - 13), cv2.FONT_HERSHEY_SIMPLEX, 1e-3 * image.shape[0], (0,255,0), 1)return image      def _main_(args):weights_path = args.weightsimage_path   ='C:\\Users\\new\\Desktop\\detect_image'# set some parametersnet_h, net_w = 416, 416obj_thresh, nms_thresh = 0.5, 0.45anchors = [[116,90,  156,198,  373,326],  [30,61, 62,45,  59,119], [10,13,  16,30,  33,23]]labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", \"boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", \"bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", \"backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", \"sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", \"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", \"apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", \"chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", \"remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", \"book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]# make the yolov3 model to predict 80 classes on COCOyolov3 = make_yolov3_model()# load the weights trained on COCO into the modelweight_reader = WeightReader(weights_path)weight_reader.load_weights(yolov3)#这里是待检测的图像# preprocess the imageimage = cv2.imread("C:\\Users\\new\\Desktop\\11.jpg")image_h, image_w, _ = image.shapenew_image = preprocess_input(image, net_h, net_w)# run the predictionyolos = yolov3.predict(new_image)boxes = []for i in range(len(yolos)):# decode the output of the networkboxes += decode_netout(yolos[i][0], anchors[i], obj_thresh, nms_thresh, net_h, net_w)# correct the sizes of the bounding boxescorrect_yolo_boxes(boxes, image_h, image_w, net_h, net_w)# suppress non-maximal boxesdo_nms(boxes, nms_thresh)     # draw bounding boxes on the image using labelsdraw_boxes(image, boxes, labels, obj_thresh) cv2.namedWindow('image',0)cv2.imshow('image',image)cv2.waitKey(0) # write the image with bounding boxes to filecv2.imwrite('C:\\Users\\new\\Desktop\\detect_image\\detection.jpg',image)#cv2.imwrite(image_path[:-4] + '_detected' + image_path[-4:], (image).astype('uint8'))if __name__ == '__main__':args = argparser.parse_args()_main_(args)

训练之前的配置

这里采用小浣熊的图像进行训练，从coco数据集上可以下载，小浣熊数据，下载好之后通过代码里面的config_raccoon.json来配置数据，如下所示：主要是训练数据的路径以及一些超参数！

{"model" : {"min_input_size":       352,"max_input_size":       448,"anchors":              [17,18, 28,24, 36,34, 42,44, 56,51, 72,66, 90,95, 92,154, 139,281],"labels":               ["raccoon"]},"train": {"train_image_folder":   "C:\\Users\\new\\Desktop\\raccoon_dataset-master\\images\\","train_annot_folder":   "C:\\Users\\new\\Desktop\\raccoon_dataset-master\\annotations\\","cache_name":           "raccoon_train.pkl","train_times":          1,"batch_size":           1,"learning_rate":        1e-4,"nb_epochs":            20,"warmup_epochs":        3,"ignore_thresh":        0.5,"gpus":                 "0,1","grid_scales":          [1,1,1],"obj_scale":            5,"noobj_scale":          1,"xywh_scale":           1,"class_scale":          1,"tensorboard_dir":      "log_raccoon","saved_weights_name":   "raccoon.h5","debug":                true},"valid": {"valid_image_folder":   "","valid_annot_folder":   "","cache_name":           "","valid_times":          1}}

在该文件中配置好之后到源代码里的gen_anchors中进行配置，这一步主要是上面.json的配置，很简单的！
接下来就是对train.py文件进行配置，同样也是.json文件的配置。以下就是训练数据的代码：train.py

#! /usr/bin/env pythonimport argparse
import os
import numpy as np
import json
from voc import parse_voc_annotation
from yolo import create_yolov3_model, dummy_loss
from generator import BatchGenerator
from utils.utils import normalize, evaluate, makedirs
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.optimizers import Adam
from callbacks import CustomModelCheckpoint, CustomTensorBoard
from utils.multi_gpu_model import multi_gpu_model
import tensorflow as tf
#import keras
from keras.models import load_modeldef create_training_instances(train_annot_folder,train_image_folder,train_cache,valid_annot_folder,valid_image_folder,valid_cache,labels,
):# parse annotations of the training settrain_ints, train_labels = parse_voc_annotation(train_annot_folder, train_image_folder, train_cache, labels)# parse annotations of the validation set, if any, otherwise split the training setif os.path.exists(valid_annot_folder):valid_ints, valid_labels = parse_voc_annotation(valid_annot_folder, valid_image_folder, valid_cache, labels)else:print("valid_annot_folder not exists. Spliting the trainining set.")train_valid_split = int(0.8*len(train_ints))np.random.seed(0)np.random.shuffle(train_ints)np.random.seed()valid_ints = train_ints[train_valid_split:]train_ints = train_ints[:train_valid_split]# compare the seen labels with the given labels in config.jsonif len(labels) > 0:overlap_labels = set(labels).intersection(set(train_labels.keys()))print('Seen labels: \t'  + str(train_labels) + '\n')print('Given labels: \t' + str(labels))# return None, None, None if some given label is not in the datasetif len(overlap_labels) < len(labels):print('Some labels have no annotations! Please revise the list of labels in the config.json.')return None, None, Noneelse:print('No labels are provided. Train on all seen labels.')print(train_labels)labels = train_labels.keys()max_box_per_image = max([len(inst['object']) for inst in (train_ints + valid_ints)])return train_ints, valid_ints, sorted(labels), max_box_per_imagedef create_callbacks(saved_weights_name, tensorboard_logs, model_to_save):makedirs(tensorboard_logs)early_stop = EarlyStopping(monitor     = 'loss', min_delta   = 0.01, patience    = 5, mode        = 'min', verbose     = 1)checkpoint = CustomModelCheckpoint(model_to_save   = model_to_save,filepath        = saved_weights_name,# + '{epoch:02d}.h5',monitor         = 'loss', verbose         = 1, save_best_only  = True, mode            = 'min', period          = 1)reduce_on_plateau = ReduceLROnPlateau(monitor  = 'loss',factor   = 0.1,patience = 2,verbose  = 1,mode     = 'min',epsilon  = 0.01,cooldown = 0,min_lr   = 0)tensorboard = CustomTensorBoard(log_dir                = tensorboard_logs,write_graph            = True,write_images           = True,)    return [early_stop, checkpoint, reduce_on_plateau, tensorboard]def create_model(nb_class, anchors, max_box_per_image, max_grid, batch_size, warmup_batches, ignore_thresh, multi_gpu, saved_weights_name, lr,grid_scales,obj_scale,noobj_scale,xywh_scale,class_scale
):if multi_gpu > 1:with tf.device('/cpu:0'):template_model, infer_model = create_yolov3_model(nb_class            = nb_class, anchors             = anchors, max_box_per_image   = max_box_per_image, max_grid            = max_grid, batch_size          = batch_size//multi_gpu, warmup_batches      = warmup_batches,ignore_thresh       = ignore_thresh,grid_scales         = grid_scales,obj_scale           = obj_scale,noobj_scale         = noobj_scale,xywh_scale          = xywh_scale,class_scale         = class_scale)else:template_model, infer_model = create_yolov3_model(nb_class            = nb_class, anchors             = anchors, max_box_per_image   = max_box_per_image, max_grid            = max_grid, batch_size          = batch_size, warmup_batches      = warmup_batches,ignore_thresh       = ignore_thresh,grid_scales         = grid_scales,obj_scale           = obj_scale,noobj_scale         = noobj_scale,xywh_scale          = xywh_scale,class_scale         = class_scale)  # load the pretrained weight if exists, otherwise load the backend weight onlyif os.path.exists(saved_weights_name): print("\nLoading pretrained weights.\n")template_model.load_weights(saved_weights_name)else:#这里的backend.h5是预训练过的模型，需要自己训练或者下载别人的[训练好的权重](https://1drv.ms/u/s!ApLdDEW3ut5fgQXa7GzSlG-mdza6)template_model.load_weights("backend.h5", by_name=True)
#这里需要下载好的.h5if multi_gpu > 1:train_model = multi_gpu_model(template_model, gpus=multi_gpu)else:train_model = template_model      optimizer = Adam(lr=lr, clipnorm=0.001)train_model.compile(loss=dummy_loss, optimizer=optimizer)             return train_model, infer_modeldef _main_(args):config_path = "./zoo/config_raccoon.json"with open(config_path) as config_buffer:    config = json.loads(config_buffer.read())################################   Parse the annotations###############################train_ints, valid_ints, labels, max_box_per_image = create_training_instances(config['train']['train_annot_folder'],config['train']['train_image_folder'],config['train']['cache_name'],config['valid']['valid_annot_folder'],config['valid']['valid_image_folder'],config['valid']['cache_name'],config['model']['labels'])print('\nTraining on: \t' + str(labels) + '\n')################################   Create the generators###############################train_generator = BatchGenerator(instances           = train_ints, anchors             = config['model']['anchors'],   labels              = labels,        downsample          = 32, # ratio between network input's size and network output's size, 32 for YOLOv3max_box_per_image   = max_box_per_image,batch_size          = config['train']['batch_size'],min_net_size        = config['model']['min_input_size'],max_net_size        = config['model']['max_input_size'],   shuffle             = True, jitter              = 0.3, norm                = normalize)valid_generator = BatchGenerator(instances           = valid_ints, anchors             = config['model']['anchors'],   labels              = labels,        downsample          = 32, # ratio between network input's size and network output's size, 32 for YOLOv3max_box_per_image   = max_box_per_image,batch_size          = config['train']['batch_size'],min_net_size        = config['model']['min_input_size'],max_net_size        = config['model']['max_input_size'],   shuffle             = True, jitter              = 0.0, norm                = normalize)################################   Create the model###############################if os.path.exists(config['train']['saved_weights_name']): config['train']['warmup_epochs'] = 0warmup_batches = config['train']['warmup_epochs'] * (config['train']['train_times']*len(train_generator))   os.environ['CUDA_VISIBLE_DEVICES'] = config['train']['gpus']multi_gpu = len(config['train']['gpus'].split(','))train_model, infer_model = create_model(nb_class            = len(labels), anchors             = config['model']['anchors'], max_box_per_image   = max_box_per_image, max_grid            = [config['model']['max_input_size'], config['model']['max_input_size']], batch_size          = config['train']['batch_size'], warmup_batches      = warmup_batches,ignore_thresh       = config['train']['ignore_thresh'],multi_gpu           = multi_gpu,saved_weights_name  = config['train']['saved_weights_name'],lr                  = config['train']['learning_rate'],grid_scales         = config['train']['grid_scales'],obj_scale           = config['train']['obj_scale'],noobj_scale         = config['train']['noobj_scale'],xywh_scale          = config['train']['xywh_scale'],class_scale         = config['train']['class_scale'],)################################   Kick off the training###############################callbacks = create_callbacks(config['train']['saved_weights_name'], config['train']['tensorboard_dir'], infer_model)train_model.fit_generator(generator        = train_generator, steps_per_epoch  = len(train_generator) * config['train']['train_times'], epochs           = config['train']['nb_epochs'] + config['train']['warmup_epochs'], verbose          = 2 if config['train']['debug'] else 1,callbacks        = callbacks, workers          = 4,max_queue_size   = 8)# make a GPU version of infer_model for evaluationif multi_gpu > 1:infer_model = load_model(config['train']['saved_weights_name'])################################   Run the evaluation################################ compute mAP for all the classesaverage_precisions = evaluate(infer_model, valid_generator)# print the scorefor label, average_precision in average_precisions.items():print(labels[label] + ': {:.4f}'.format(average_precision))print('mAP: {:.4f}'.format(sum(average_precisions.values()) / len(average_precisions)))           if __name__ == '__main__':argparser = argparse.ArgumentParser(description='train and evaluate YOLO_v3 model on any dataset')argparser.add_argument('-c ', '--conf', help='path to configuration file')   args = argparser.parse_args()_main_(args)

测试结果

由于是在raccoon数据集上进行训练（后面有数据集下载链接），在这里只对小浣熊进行测试，通过源代码中的predict.py文件进行测试，测试结果如下：

注意

由于图像比较大，自己的笔记本在训练的时候总是显示显存不够，所以最好找个显卡比较好的电脑！！！

源代码链接

keras-yolo3-master源代码，yolo3模型，训练好的权重backend.h5，可以直接运行，基于tensorflow-1.2.0 +keras 2.1.2。注意：以上代码只是源代码中的yolo3_one_to_detect_them_all.py和train.py文件，代码中还有评估evaluate过程。