简介

当我们在使用数据集训练计算机视觉模型时，常常会遇到有的数据集提供了多个annotation xml文件，而YOLO模型所需要的annotation是基于每个图片的txt annotation文件，在这里提供笔者所用的xml文件转txt文件的方法

前期准备

创建项目文件夹

新建文件夹用于储存此项目

创建文件夹用于储存图像和xml标签

此处提供两种方法

代码法

创建python文件并运行以下代码

import os
os.makedirs('VOCdevkit/VOC2007/Annotations')
os.makedirs('VOCdevkit/VOC2007/JPEGImages')

直接法
- 在项目文件夹内创建文件夹命名为VOCdevkit
- 在VOCdevkit文件夹内创建文件夹命名为VOC2007
- 在VOC2007文件夹内创建两个文件夹分别命名为Annotations和JPEGImages
成功后如图所示

导入图像和xml标签

将xml文件全部放入Annotations文件夹下

将图片全部放入JPEGImages文件夹下

进行转换

创建转换代码

在项目文件夹下创建python文件并输入以下代码

import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
import random
from shutil import copyfile# 根据自己的需求设置类文件
classes = []# Training ratio
# 根据自己的需求设置训练比
TRAIN_RATIO = 70# Traversing folders
def clear_hidden_files(path):dir_list = os.listdir(path)for i in dir_list:abspath = os.path.join(os.path.abspath(path), i)if os.path.isfile(abspath):if i.startswith("._"):os.remove(abspath)else:clear_hidden_files(abspath)# Normalization of width and height is performed
# Size is the width and height of the original image
def convert(size, box):dw = 1. / size[0]dh = 1. / size[1]# Get Center Pointx = (box[0] + box[1]) / 2.0y = (box[2] + box[3]) / 2.0# Calculate width and heightw = box[1] - box[0]h = box[3] - box[2]x = x * dww = w * dwy = y * dhh = h * dhreturn (x, y, w, h)# Parsing xml files
def convert_annotation(image_id):in_file = open('VOCdevkit/VOC2007/Annotations/%s.xml' % image_id,'rb')out_file = open('VOCdevkit/VOC2007/YOLOLabels/%s.txt' % image_id, 'w')tree = ET.parse(in_file)root = tree.getroot()size = root.find('size')w = int(size.find('width').text)h = int(size.find('height').text)for obj in root.iter('object'):difficult = obj.find('difficult').textcls = obj.find('name').textif cls not in classes or int(difficult) == 1:continuecls_id = classes.index(cls)xmlbox = obj.find('bndbox')b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),float(xmlbox.find('ymax').text))bb = convert((w, h), b)out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')in_file.close()out_file.close()wd = os.getcwd()
wd = os.getcwd()
data_base_dir = os.path.join(wd, "VOCdevkit/")
if not os.path.isdir(data_base_dir):os.mkdir(data_base_dir)
work_sapce_dir = os.path.join(data_base_dir, "VOC2007/")
if not os.path.isdir(work_sapce_dir):os.mkdir(work_sapce_dir)
annotation_dir = os.path.join(work_sapce_dir, "Annotations/")
if not os.path.isdir(annotation_dir):os.mkdir(annotation_dir)
clear_hidden_files(annotation_dir)
image_dir = os.path.join(work_sapce_dir, "JPEGImages/")
if not os.path.isdir(image_dir):os.mkdir(image_dir)
clear_hidden_files(image_dir)
yolo_labels_dir = os.path.join(work_sapce_dir, "YOLOLabels/")
if not os.path.isdir(yolo_labels_dir):os.mkdir(yolo_labels_dir)
clear_hidden_files(yolo_labels_dir)
yolov5_images_dir = os.path.join(data_base_dir, "images/")
if not os.path.isdir(yolov5_images_dir):os.mkdir(yolov5_images_dir)
clear_hidden_files(yolov5_images_dir)
yolov5_labels_dir = os.path.join(data_base_dir, "labels/")
if not os.path.isdir(yolov5_labels_dir):os.mkdir(yolov5_labels_dir)
clear_hidden_files(yolov5_labels_dir)
yolov5_images_train_dir = os.path.join(yolov5_images_dir, "train/")
if not os.path.isdir(yolov5_images_train_dir):os.mkdir(yolov5_images_train_dir)
clear_hidden_files(yolov5_images_train_dir)
yolov5_images_test_dir = os.path.join(yolov5_images_dir, "val/")
if not os.path.isdir(yolov5_images_test_dir):os.mkdir(yolov5_images_test_dir)
clear_hidden_files(yolov5_images_test_dir)
yolov5_labels_train_dir = os.path.join(yolov5_labels_dir, "train/")
if not os.path.isdir(yolov5_labels_train_dir):os.mkdir(yolov5_labels_train_dir)
clear_hidden_files(yolov5_labels_train_dir)
yolov5_labels_test_dir = os.path.join(yolov5_labels_dir, "val/")
if not os.path.isdir(yolov5_labels_test_dir):os.mkdir(yolov5_labels_test_dir)
clear_hidden_files(yolov5_labels_test_dir)train_file = open(os.path.join(wd, "yolov5_train.txt"), 'w')
test_file = open(os.path.join(wd, "yolov5_val.txt"), 'w')
train_file.close()
test_file.close()
train_file = open(os.path.join(wd, "yolov5_train.txt"), 'a')
test_file = open(os.path.join(wd, "yolov5_val.txt"), 'a')
list_imgs = os.listdir(image_dir)  # list image_one files
prob = random.randint(1, 100)
print("Probability: %d" % prob)
for i in range(0, len(list_imgs)):path = os.path.join(image_dir, list_imgs[i])if os.path.isfile(path):image_path = image_dir + list_imgs[i]voc_path = list_imgs[i](nameWithoutExtention, extention) = os.path.splitext(os.path.basename(image_path))(voc_nameWithoutExtention, voc_extention) = os.path.splitext(os.path.basename(voc_path))annotation_name = nameWithoutExtention + '.xml'annotation_path = os.path.join(annotation_dir, annotation_name)label_name = nameWithoutExtention + '.txt'label_path = os.path.join(yolo_labels_dir, label_name)prob = random.randint(1, 100)print("Probability: %d" % prob)if (prob < TRAIN_RATIO):  # train datasetif os.path.exists(annotation_path):train_file.write(image_path + '\n')convert_annotation(nameWithoutExtention)  # convert labelcopyfile(image_path, yolov5_images_train_dir + voc_path)copyfile(label_path, yolov5_labels_train_dir + label_name)else:  # test datasetif os.path.exists(annotation_path):test_file.write(image_path + '\n')convert_annotation(nameWithoutExtention)  # convert labelcopyfile(image_path, yolov5_images_test_dir + voc_path)copyfile(label_path, yolov5_labels_test_dir + label_name)
train_file.close()
test_file.close()

根据自己的需求设置类和训练比

运行转换代码

转换完成

转换完成后会在VOCdevkit文件夹下生成images文件夹和labels文件夹，每个文件夹都会根据所设置的训练比生成trian文件夹和val文件夹
imges文件夹下会储存图片

labels文件夹下会储存所需的txt文件

后续

若需要使用YOLO模型进行训练，则可直接复制images文件夹和labels文件夹
若只需要使用txt文件，则可复制labels->train和labels->val中的全部txt文件

将多个annotation xml文件转换为多个annotation txt文件相关推荐

csv转为utf8编码_将utf-8编码的csv文件转换为gb2312编码的csv文件
csv文件编码转换:解决utf-8编码的文件在excel打开时中文乱码问题. Python完整代码如下: # -*- coding:utf-8 -*- # date = 20201231 #将utf- ...
xps数据怎么导出为txt_WFP: 读取XPS文件或将word、txt文件转化为XPS文件
读取XPS格式文件或将doc,txt文件转化为XPS文件,效果图如下: 1.XAML页面代码: xmlns="http://schemas.microsoft.com/winfx/2006/ ...
python读取大文件csv_python 快速把超大txt文件转存为csv的实例
python 快速把超大txt文件转存为csv的实例今天项目有个需求,就是把txt文件转为csv,txt之间是空格隔开,转为csv时需要把空格转换为逗号,网上找的一个版本,只需要三行代码,特别犀利: ...
php更改txt文件,如何使用php对txt文件进行修改
如何使用php对txt文件进行修改发布时间:2020-08-03 10:16:50 来源:亿速云阅读:67 作者:Leah 如何使用php对txt文件进行修改?针对这个问题,这篇文章详细介绍了相对 ...
mfc下创建html文件,用MFC怎么创建TXT文件并写入数据
应该是你的路径写法不正确造成的,是不是没有注意'\'的写法,在C++里要表示'\'就要写成'\\'. 比如表示C:\1\2.txt,就应该写成C:\\1\\2.txt 可以用SHFileOperati ...
java读取文件夹下的所有txt文件,java读取文件夹下文件及txt内容
public class PositionController { // 读取txt内容 public static String txt2String(File file) { StringBuil ...
java读取文件并输出_java读取txt文件并输出结果
这篇文章主要介绍了java读取txt文件并输出结果,文中通过示例代码介绍的非常详细,对大家的学习或者工作具有一定的参考学习价值,需要的朋友可以参考下描述: 1.java读取指定txt文件并解析文件 ...
python txt文件处理软件,对python .txt文件读取及数据处理方法总结
1.处理包含数据的文件最近利用python读取txt文件时遇到了一个小问题,就是在计算两个np.narray()类型的数组时,出现了以下错误: 作为一个python新手,遇到这个问题后花费了挺多时间 ...
labelimg标注的VOC格式标签xml文件和yolo格式标签txt文件相互转换
目录 1 labelimg标注VOC格式和yolo格式介绍 1.1 voc格式 1.2 yolo数据格式介绍 2 voc格式数据和yolo格式数据相互转换 2.1 voc转yolo代码 2.2 yol ...

将多个annotation xml文件转换为多个annotation txt文件

简介

前期准备

创建项目文件夹

创建文件夹用于储存图像和xml标签

导入图像和xml标签

进行转换

创建转换代码

运行转换代码

转换完成

后续

将多个annotation xml文件转换为多个annotation txt文件相关推荐

最新文章

热门文章