MOSAIC扩充VOC数据集

使用YOLOV4中的mosaic扩充VOC数据集并保存新的图像和xml标签（亲测可成功运行）

#-- coding:UTF-8 --
from xml.etree import ElementTree as ET  # xml文件解析方法
import numpy as np
import cv2#（3）处理超出边缘的检测框
def merge_bboxes(bboxes, cutx, cuty):# 保存修改后的检测框merge_box = []# 遍历每张图像，共4个for i, box in enumerate(bboxes):# 每张图片中需要删掉的检测框index_list = []# 遍历每张图的所有检测框,index代表第几个框for index, box in enumerate(box[0]):# axis=1纵向删除index索引指定的列，axis=0横向删除index指定的行# box[0] = np.delete(box[0], index, axis=0)         # 获取每个检测框的宽高x1, y1, x2, y2 = box# 如果是左上图，修正右侧和下侧框线if i== 0:# 如果检测框左上坐标点不在第一部分中，就忽略它if x1 > cutx or y1 > cuty:index_list.append(index) # 如果检测框右下坐标点不在第一部分中，右下坐标变成边缘点if y2 >= cuty and y1 <= cuty:y2 = cutyif y2-y1 < 5:index_list.append(index)if x2 >= cutx and x1 <= cutx:x2 = cutx# 如果修正后的左上坐标和右下坐标之间的距离过小，就忽略这个框if x2-x1 < 5:index_list.append(index) # 如果是右上图，修正左侧和下册框线if i == 1:if x2 < cutx or y1 > cuty:index_list.append(index) if y2 >= cuty and y1 <= cuty:y2 = cutyif y2-y1 < 5:index_list.append(index)if x1 <= cutx and x2 >= cutx:x1 = cutxif x2-x1 < 5:index_list.append(index) # 如果是左下图if i == 2:if x1 > cutx or y2 < cuty:index_list.append(index) if y1 <= cuty and y2 >= cuty:y1 = cutyif y2-y1 < 5:index_list.append(index) if x1 <= cutx and x2 >= cutx:x2 = cutxif x2-x1 < 5:index_list.append(index) # 如果是右下图if i == 3:if x2 < cutx or y2 < cuty:index_list.append(index) if x1 <= cutx and x2 >= cutx:x1 = cutxif x2-x1 < 5:index_list.append(index) if y1 <= cuty and y2 >= cuty:y1 = cutyif y2-y1 < 5:index_list.append(index) # 更新坐标信息bboxes[i][0][index] = [x1, y1, x2, y2]  # 更新第i张图的第index个检测框的坐标# 删除不满足要求的框，并保存merge_box.append(np.delete(bboxes[i][0], index_list, axis=0))# 返回坐标信息return merge_box#（1）对传入的四张图片数据增强
def get_random_data(image_list, input_shape):h, w = input_shape  # 获取图像的宽高'''设置拼接的分隔线位置'''min_offset_x = 0.4min_offset_y = 0.4  scale_low = 1 - min(min_offset_x, min_offset_y)  # 0.6scale_high = scale_low + 0.2  # 0.8image_datas = []  # 存放图像信息box_datas = []  # 存放检测框信息index = 0  # 当前是第几张图#（1）图像分割for frame_list in image_list:frame = frame_list[0]  # 取出的某一张图像box = np.array(frame_list[1:])  # 该图像对应的检测框坐标ih, iw = frame.shape[0:2]  # 图片的宽高cx = (box[0,:,0] + box[0,:,2]) // 2  # 检测框中心点的x坐标cy = (box[0,:,1] + box[0,:,3]) // 2  # 检测框中心点的y坐标# 对输入图像缩放new_ar = w/h  # 图像的宽高比scale = np.random.uniform(scale_low, scale_high)   # 缩放0.6--0.8倍# 调整后的宽高nh = int(scale * h)  # 缩放比例乘以要求的宽高nw = int(nh * new_ar)  # 保持原始宽高比例# 缩放图像frame = cv2.resize(frame, (nw,nh))# 调整中心点坐标cx = cx * nw/iw cy = cy * nh/ih # 调整检测框的宽高bw = (box[0,:,2] - box[0,:,0]) * nw/iw  # 修改后的检测框的宽高bh = (box[0,:,3] - box[0,:,1]) * nh/ih# 创建一块[416,416]的底版new_frame = np.zeros((h,w,3), np.uint8)# 确定每张图的位置if index==0: new_frame[0:nh, 0:nw] = frame   # 第一张位于左上方elif index==1: new_frame[0:nh, w-nw:w] = frame  # 第二张位于右上方elif index==2: new_frame[h-nh:h, 0:nw] = frame  # 第三张位于左下方elif index==3: new_frame[h-nh:h, w-nw:w] = frame  # 第四张位于右下方# 修正每个检测框的位置if index==0:  # 左上图像box[0,:,0] = cx - bw // 2  # x1box[0,:,1] = cy - bh // 2  # y1box[0,:,2] = cx + bw // 2  # x2box[0,:,3] = cy + bh // 2  # y2         if index==1:  # 右上图像box[0,:,0] = cx - bw // 2 + w - nw  # x1box[0,:,1] = cy - bh // 2  # y1box[0,:,2] = cx + bw // 2 + w - nw # x2box[0,:,3] = cy + bh // 2  # y2if index==2:  # 左下图像box[0,:,0] = cx - bw // 2  # x1box[0,:,1] = cy - bh // 2 + h - nh  # y1box[0,:,2] = cx + bw // 2  # x2box[0,:,3] = cy + bh // 2 + h - nh  # y2if index==3:  # 右下图像box[0,:,2] = cx - bw // 2 + w - nw # x1box[0,:,3] = cy - bh // 2 + h - nh # y1box[0,:,0] = cx + bw // 2 + w - nw # x2box[0,:,1] = cy + bh // 2 + h - nh  # y2index = index + 1  # 处理下一张# 保存处理后的图像及对应的检测框坐标image_datas.append(new_frame)box_datas.append(box)# 取出某张图片以及它对应的检测框信息, i代表图片索引for image, boxes in zip(image_datas, box_datas):# 复制一份原图image_copy = image.copy()# 遍历该张图像中的所有检测框for box in boxes[0]:  # 获取某一个框的坐标x1, y1, x2, y2 = boxcv2.rectangle(image_copy, (x1,y1), (x2,y2), (0,255,0), 2)cv2.imshow('img', image_copy)cv2.waitKey(0)cv2.destroyAllWindows()#（2）将四张图像拼接在一起# 在指定范围中选择横纵向分割线cutx = np.random.randint(int(w*min_offset_x), int(w*(1-min_offset_x)))cuty = np.random.randint(int(h*min_offset_y), int(h*(1-min_offset_y)))        # 创建一块[416,416]的底版用来组合四张图new_image = np.zeros((h,w,3), np.uint8)new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]new_image[:cuty, cutx:, :] = image_datas[1][:cuty, cutx:, :]new_image[cuty:, :cutx, :] = image_datas[2][cuty:, :cutx, :]new_image[cuty:, cutx:, :] = image_datas[3][cuty:, cutx:, :]# 显示合并后的图像cv2.imshow('new_img', new_image)cv2.waitKey(0)cv2.destroyAllWindows()# 复制一份合并后的原图final_image_copy = new_image.copy()# 显示有检测框并合并后的图像for boxes in box_datas:# 遍历该张图像中的所有检测框for box in boxes[0]:  # 获取某一个框的坐标x1, y1, x2, y2 = boxcv2.rectangle(final_image_copy, (x1,y1), (x2,y2), (0,255,0), 2)cv2.imshow('new_img_bbox', final_image_copy)cv2.waitKey(0)cv2.destroyAllWindows()# 处理超出图像边缘的检测框new_boxes = merge_bboxes(box_datas, cutx, cuty)# 复制一份合并后的图像modify_image_copy = new_image.copy()# 绘制修正后的检测框for boxes in new_boxes:  # 遍历每张图像中的所有检测框for box in boxes:# 获取某一个框的坐标x1, y1, x2, y2 = boxcv2.rectangle(modify_image_copy, (x1,y1), (x2,y2), (0,255,0), 2)cv2.imshow('new_img_bbox', modify_image_copy)cv2.waitKey(0)cv2.destroyAllWindows()            # 主函数，获取图片路径和检测框路径
if __name__ == '__main__':# 给出图片文件夹和检测框文件夹所在的位置image_dir = '/home/conan/zbj/3/1/72/img/'annotation_dir = '/home/conan/zbj/3/1/72/xml'image_list = []  # 存放每张图像和该图像对应的检测框坐标信息# 读取4张图像及其检测框信息for i in range(4):image_box = []  # 存放每张图片的检测框信息# 某张图片位置及其对应的检测框信息image_path = image_dir + str(i+1) + '.jpg'annotation_path = annotation_dir + '/'+str(i+1) + '.xml'print("anno is ", annotation_path)print("i is ", i)image = cv2.imread(image_path)  # 读取图像# 读取检测框信息with open(annotation_path, 'r') as new_f:#  getroot()获取根节点root = ET.parse(annotation_path).getroot()# findall查询根节点下的所有直系子节点，find查询根节点下的第一个直系子节点for obj in root.findall('object'):obj_name = obj.find('name').text   # 目标名称bndbox = obj.find('bndbox')left = eval(bndbox.find('xmin').text)    # 左上坐标xtop = eval(bndbox.find('ymin').text)     # 左上坐标yright = eval(bndbox.find('xmax').text)   # 右下坐标xbottom = eval(bndbox.find('ymax').text)  # 右下坐标y# 保存每张图片的检测框信息image_box.append([left, top, right, bottom])  # [[x1,y1,x2,y2],[..],[..]]# 保存图像及其对应的检测框信息image_list.append([image, image_box])# 缩放、拼接图片get_random_data(image_list, input_shape=[416,416])

结果

四张图象合成一张并生成新的xml

参考：https://blog.csdn.net/wilbur520/article/details/107760805

MOSAIC扩充VOC数据集相关推荐

把VOC数据集转化成txt文件python
主要是网上一些已有并已用了觉得还可以的,同时记录方便以后查询使用. 第一个代码是生成VOC数据集的无后缀文件名于txt文件中,如下所示: E84.27832967649_N46.23271680371 ...
PASCAL VOC数据集分析（分类部分）
PASCAL VOC数据集分析 PASCAL VOC为图像识别和分类提供了一整套标准化的优秀的数据集,从2005年到2012年每年都会举行一场图像识别challenge. 每一年都有自己的数据集.pa ...
【学习记录】win10搭建YOLOX训练自己的VOC数据集
我是目录: 前言: 1.yolox的训练配置 2.yolox源码 3.必要的环境配置yolox所需环境 1.安装依赖库 2.安装yolox 3.安装apex 4.下载预训练模型 4.准备自己的数据集 ...
【目标检测实战学习】从零开始制作并训练自己的VOC数据集，并使用Retinanet进行目标检测
目录基础软件安装项目来源环境配置使用LabelImg给图片打标签数据增强划分训练集,测试集模型训练将验证集结果可视化首先写一下我们这个项目的思路 1.下载图片,网上随便找 2.使用L ...
pythonvbb转换txt_Caltech行人数据集转化VOC数据集
Caltech行人数据集转化VOC数据集发布时间:2018-04-09 17:31, 浏览次数:1213 , 标签: Caltech VOC CaltechPestrian2VOC 代码仓库地址 1 ...
map评价吗 voc数据集可以用coco_【庖丁解牛】从零实现RetinaNet（九）：使用COCO预训练权重在VOC上训练RetinaNet...
下列代码均在pytorch1.4版本中测试过,确认正确无误. 如何载入COCO预训练权重由于其他数据集如VOC的类别数不一定和COCO数据集相同,载入COCO预训练权重后要先去掉和类别有关的卷积层权 ...
Ubuntu上用caffe的SSD方法训练Pascal VOC数据集
实验目的继caffe和ssd搭建完成,demo也演示完毕,了解一些基本知识后,现在开始训练自己的数据集,在给自己的训练集处理之前,先跟着官方的示例将Pascal VOC数据集训练走一遍,把可能会踩坑 ...
RCNN系列实验的PASCAL VOC数据集格式设置
我们在做RCNN系列的实验时,往往需要把数据集的格式设置为和PASCAL VOC数据集一样的格式,其实当然也可以修改读取数据的代码,只是这样更为麻烦,自己的数据格式变了又得修改. 首先以VOC200 ...
Deep Learning---py-faster-rcnn基于PASCAL VOC数据集训练模型
0 说明系统环境为Ubuntu14.04, 已经安装好了CUDA和cuDNN以及Python等基础包. 1 设置和编译py-faster-rcnn 1.1 下载py-faster-rcnn $ gi ...

MOSAIC扩充VOC数据集

MOSAIC扩充VOC数据集相关推荐

最新文章

热门文章