(仅用于自己学习时记录)

在使用YOLO V5训练Tsinghua-Tencent 100k时发现的问题。官方的数据集为json格式,而YOLO需要txt格式。在网上查阅了相关资料,并记录。
Tsinghua-Tencent 100k数据集下载官网:https://cg.cs.tsinghua.edu.cn/traffic-sign/
相关参考资料:https://gitcode.net/mirrors/halftop/TT100K_YOLO_Label?utm_source=csdn_github_accelerator

1. json转xml

import os
import json
from lxml import etree as ET
from xml.dom import minidomdef edit_xml(objects, id, dir):save_xml_path = os.path.join(dir, "%s.xml" % id)  # xmlroot = ET.Element("annotation")# root.set("version", "1.0")  folder = ET.SubElement(root, "folder")folder.text = "none"filename = ET.SubElement(root, "filename")filename.text = id + ".jpg"source = ET.SubElement(root, "source")source.text = "none"owner = ET.SubElement(root, "owner")owner.text = "halftop"size = ET.SubElement(root, "size")width = ET.SubElement(size, "width")width.text = str(2048)height = ET.SubElement(size, "height")height.text = str(2048)depth = ET.SubElement(size, "depth")depth.text = "3"segmented = ET.SubElement(root, "segmented")segmented.text = "0"for obj in objects:  #object = ET.SubElement(root, "object")name = ET.SubElement(object, "name")  # numbername.text = obj["category"]# meaning = ET.SubElement(object, "meaning")  # name# meaning.text = inf_value[0]pose = ET.SubElement(object, "pose")pose.text = "Unspecified"truncated = ET.SubElement(object, "truncated")truncated.text = "0"difficult = ET.SubElement(object, "difficult")difficult.text = "0"bndbox = ET.SubElement(object, "bndbox")xmin = ET.SubElement(bndbox, "xmin")xmin.text = str(int(obj["bbox"]["xmin"]))ymin = ET.SubElement(bndbox, "ymin")ymin.text = str(int(obj["bbox"]["ymin"]))xmax = ET.SubElement(bndbox, "xmax")xmax.text = str(int(obj["bbox"]["xmax"]))ymax = ET.SubElement(bndbox, "ymax")ymax.text = str(int(obj["bbox"]["ymax"]))tree = ET.ElementTree(root)tree.write(save_xml_path, encoding="UTF-8", xml_declaration=True)root = ET.parse(save_xml_path)file_lines = minidom.parseString(ET.tostring(root, encoding="Utf-8")).toprettyxml(indent="\t")file_line = open(save_xml_path, "w", encoding="utf-8")file_line.write(file_lines)file_line.close()def getDirId(dir):  # get the  id list  of id.pngnames = os.listdir(dir)ids = []for name in names:# path = os.path.join(dir, name)# img  = cv2.imread(path)# w, h, c = img.shape# if name.endswith(".jpg") or name.endswith(".png"):# ids["%s" % name.split(".")[0]] = [w, h, c]ids.append(name.split(".")[0])return idsfiledir = "annotations.json"
annos = json.loads(open(filedir).read())trainIds = getDirId("train/")
testIds = getDirId("test/")ids = annos["imgs"].keys()  # all img ids in .jsonfor id in ids:#  json 中的ID图片有待检测目标,且该id图片在 train文件夹中if len(annos["imgs"][id]["objects"]) > 0 and (id in trainIds):objects = annos["imgs"][id]["objects"]edit_xml(objects, id, dir="xmlLabel/train")elif len(annos["imgs"][id]["objects"]) > 0 and (id in testIds):objects = annos["imgs"][id]["objects"]edit_xml(objects, id, dir="xmlLabel/test")

直接使用即可,注意路径

test和train中存放相应的图片数据,xmlLabel中存放转化后的xml格式的标签。

2. xml转txt

# -*- coding: utf-8 -*-from xml.dom import minidom
import os
import globlut={}
lut["i1"] =0
lut["i10"] =0
lut["i11"] =0
lut["i12"] =0
lut["i13"] =0
lut["i14"] =0
lut["i15"] =0
lut["i2"] =0
lut["i3"] =0
lut["i4"] =0
lut["i5"] =0
lut["il100"] =0
lut["il110"] =0
lut["il50"] =0
lut["il60"] =0
lut["il70"] =0
lut["il80"] =0
lut["il90"] =0
lut["io"] =0
lut["ip"] =0
lut["p1"] =1
lut["p10"] =1
lut["p11"] =1
lut["p12"] =1
lut["p13"] =1
lut["p14"] =1
lut["p15"] =1
lut["p16"] =1
lut["p17"] =1
lut["p18"] =1
lut["p19"] =1
lut["p2"] =1
lut["p20"] =1
lut["p21"] =1
lut["p22"] =1
lut["p23"] =1
lut["p24"] =1
lut["p25"] =1
lut["p26"] =1
lut["p27"] =1
lut["p28"] =1
lut["p3"] =1
lut["p4"] =1
lut["p5"] =1
lut["p6"] =1
lut["p7"] =1
lut["p8"] =1
lut["p9"] =1
lut["pa10"] =1
lut["pa12"] =1
lut["pa13"] =1
lut["pa14"] =1
lut["pa8"] =1
lut["pb"] =1
lut["pc"] =1
lut["pg"] =1
lut["ph1.5"] =1
lut["ph2"] =1
lut["ph2.1"] =1
lut["ph2.2"] =1
lut["ph2.4"] =1
lut["ph2.5"] =1
lut["ph2.8"] =1
lut["ph2.9"] =1
lut["ph3"] =1
lut["ph3.2"] =1
lut["ph3.5"] =1
lut["ph3.8"] =1
lut["ph4"] =1
lut["ph4.2"] =1
lut["ph4.3"] =1
lut["ph4.5"] =1
lut["ph4.8"] =1
lut["ph5"] =1
lut["ph5.3"] =1
lut["ph5.5"] =1
lut["pl10"] =1
lut["pl100"] =1
lut["pl110"] =1
lut["pl120"] =1
lut["pl15"] =1
lut["pl20"] =1
lut["pl25"] =1
lut["pl30"] =1
lut["pl35"] =1
lut["pl40"] =1
lut["pl5"] =1
lut["pl50"] =1
lut["pl60"] =1
lut["pl65"] =1
lut["pl70"] =1
lut["pl80"] =1
lut["pl90"] =1
lut["pm10"] =1
lut["pm13"] =1
lut["pm15"] =1
lut["pm1.5"] =1
lut["pm2"] =1
lut["pm20"] =1
lut["pm25"] =1
lut["pm30"] =1
lut["pm35"] =1
lut["pm40"] =1
lut["pm46"] =1
lut["pm5"] =1
lut["pm50"] =1
lut["pm55"] =1
lut["pm8"] =1
lut["pn"] =1
lut["pne"] =1
lut["po"] =1
lut["pr10"] =1
lut["pr100"] =1
lut["pr20"] =1
lut["pr30"] =1
lut["pr40"] =1
lut["pr45"] =1
lut["pr50"] =1
lut["pr60"] =1
lut["pr70"] =1
lut["pr80"] =1
lut["ps"] =1
lut["pw2"] =1
lut["pw2.5"] =1
lut["pw3"] =1
lut["pw3.2"] =1
lut["pw3.5"] =1
lut["pw4"] =1
lut["pw4.2"] =1
lut["pw4.5"] =1
lut["w1"] =2
lut["w10"] =2
lut["w12"] =2
lut["w13"] =2
lut["w16"] =2
lut["w18"] =2
lut["w20"] =2
lut["w21"] =2
lut["w22"] =2
lut["w24"] =2
lut["w28"] =2
lut["w3"] =2
lut["w30"] =2
lut["w31"] =2
lut["w32"] =2
lut["w34"] =2
lut["w35"] =2
lut["w37"] =2
lut["w38"] =2
lut["w41"] =2
lut["w42"] =2
lut["w43"] =2
lut["w44"] =2
lut["w45"] =2
lut["w46"] =2
lut["w47"] =2
lut["w48"] =2
lut["w49"] =2
lut["w5"] =2
lut["w50"] =2
lut["w55"] =2
lut["w56"] =2
lut["w57"] =2
lut["w58"] =2
lut["w59"] =2
lut["w60"] =2
lut["w62"] =2
lut["w63"] =2
lut["w66"] =2
lut["w8"] =2
lut["wo"] =2
lut["i6"] =0
lut["i7"] =0
lut["i8"] =0
lut["i9"] =0
lut["ilx"] =0
lut["p29"] =1
lut["w29"] =2
lut["w33"] =2
lut["w36"] =2
lut["w39"] =2
lut["w4"] =2
lut["w40"] =2
lut["w51"] =2
lut["w52"] =2
lut["w53"] =2
lut["w54"] =2
lut["w6"] =2
lut["w61"] =2
lut["w64"] =2
lut["w65"] =2
lut["w67"] =2
lut["w7"] =2
lut["w9"] =2
lut["pax"] =1
lut["pd"] =1
lut["pe"] =1
lut["phx"] =1
lut["plx"] =1
lut["pmx"] =1
lut["pnl"] =1
lut["prx"] =1
lut["pwx"] =1
lut["w11"] =2
lut["w14"] =2
lut["w15"] =2
lut["w17"] =2
lut["w19"] =2
lut["w2"] =2
lut["w23"] =2
lut["w25"] =2
lut["w26"] =2
lut["w27"] =2
lut["pl0"] =1
lut["pl4"] =1
lut["pl3"] =1
lut["pm2.5"] =1
lut["ph4.4"] =1
lut["pn40"] =1
lut["ph3.3"] =1
lut["ph2.6"] =1def convert_coordinates(size, box):dw = 1.0/size[0]dh = 1.0/size[1]x = (box[0]+box[1])/2.0y = (box[2]+box[3])/2.0w = box[1]-box[0]h = box[3]-box[2]x = x*dww = w*dwy = y*dhh = h*dhreturn (x,y,w,h)def convert_xml2yolo( lut ):for fname in glob.glob("*.xml"):xmldoc = minidom.parse(fname)fname_out = (fname[:-4]+'.txt')with open(fname_out, "w") as f:itemlist = xmldoc.getElementsByTagName('object')size = xmldoc.getElementsByTagName('size')[0]width = int((size.getElementsByTagName('width')[0]).firstChild.data)height = int((size.getElementsByTagName('height')[0]).firstChild.data)for item in itemlist:# get class labelclassid =  (item.getElementsByTagName('name')[0]).firstChild.dataif classid in lut:label_str = str(lut[classid])else:label_str = "-1"print ("warning: label '%s' not in look-up table" % classid)# get bbox coordinatesxmin = ((item.getElementsByTagName('bndbox')[0]).getElementsByTagName('xmin')[0]).firstChild.dataymin = ((item.getElementsByTagName('bndbox')[0]).getElementsByTagName('ymin')[0]).firstChild.dataxmax = ((item.getElementsByTagName('bndbox')[0]).getElementsByTagName('xmax')[0]).firstChild.dataymax = ((item.getElementsByTagName('bndbox')[0]).getElementsByTagName('ymax')[0]).firstChild.datab = (float(xmin), float(xmax), float(ymin), float(ymax))bb = convert_coordinates((width,height), b)#print(bb)f.write(label_str + " " + " ".join([("%.6f" % a) for a in bb]) + '\n')print ("wrote %s" % fname_out)def main():convert_xml2yolo( lut )if __name__ == '__main__':main()

上面的代码将交通标志分为了三类,分别为0,1,2。使用时注意修改文件夹路径

import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
import random
from shutil import copyfileclasses = ["i1","i10","i11","i12","i13","i14","i15","i2","i3","i4","i5","il100","il110","il50","il60","il70","il80","il90","io","ip","p1","p10","p11","p12","p13","p14","p15","p16","p17","p18","p19","p2","p20","p21","p22","p23","p24","p25","p26","p27","p28","p3","p4","p5","p6","p7","p8","p9","pa10","pa12","pa13","pa14","pa8","pb","pc","pg","ph1.5","ph2","ph2.1","ph2.2","ph2.4","ph2.5","ph2.8","ph2.9","ph3","ph3.2","ph3.5","ph3.8","ph4","ph4.2","ph4.3","ph4.5","ph4.8","ph5","ph5.3","ph5.5","pl10","pl100","pl110","pl120","pl15","pl20","pl25","pl30","pl35","pl40","pl5","pl50","pl60","pl65","pl70","pl80","pl90","pm10","pm13","pm15","pm1.5","pm2","pm20","pm25","pm30","pm35","pm40","pm46","pm5","pm50","pm55","pm8","pn","pne","po","pr10","pr100","pr20","pr30","pr40","pr45","pr50","pr60","pr70","pr80","ps","pw2","pw2.5","pw3","pw3.2","pw3.5","pw4","pw4.2","pw4.5","w1","w10","w12","w13","w16","w18","w20","w21","w22","w24","w28","w3","w30","w31","w32","w34","w35","w37","w38","w41","w42","w43","w44","w45","w46","w47","w48","w49","w5","w50","w55","w56","w57","w58","w59","w60","w62","w63","w66","w8","wo","i6","i7","i8","i9","ilx","p29","w29","w33","w36","w39","w4","w40","w51","w52","w53","w54","w6","w61","w64","w65","w67","w7","w9","pax","pd","pe","phx","plx","pmx","pnl","prx","pwx","w11","w14","w15","w17","w19","w2","w23","w25","w26","w27","pl0","pl4","pl3","pm2.5","ph4.4","pn40","ph3.3","ph2.6"]
# classes=["ball"]TRAIN_RATIO = 80def clear_hidden_files(path):dir_list = os.listdir(path)for i in dir_list:abspath = os.path.join(os.path.abspath(path), i)if os.path.isfile(abspath):if i.startswith("._"):os.remove(abspath)else:clear_hidden_files(abspath)def convert(size, box):dw = 1. / size[0]dh = 1. / size[1]x = (box[0] + box[1]) / 2.0y = (box[2] + box[3]) / 2.0w = box[1] - box[0]h = box[3] - box[2]x = x * dww = w * dwy = y * dhh = h * dhreturn (x, y, w, h)def convert_annotation(image_id: object) -> object:in_file = open('E:/python/learning_Tsinghua/test_code/xmlLabel/train/%s.xml' % image_id)out_file = open('E:/python/learning_Tsinghua/test_code/txtLabel/train/%s.txt' % image_id, 'w')tree = ET.parse(in_file)root = tree.getroot()size = root.find('size')w = int(size.find('width').text)h = int(size.find('height').text)for obj in root.iter('object'):difficult = obj.find('difficult').textcls = obj.find('name').textif cls not in classes or int(difficult) == 1:continuecls_id = classes.index(cls)xmlbox = obj.find('bndbox')b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),float(xmlbox.find('ymax').text))bb = convert((w, h), b)out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')in_file.close()out_file.close()wd = os.getcwd()
wd = os.getcwd()
data_base_dir = os.path.join(wd, "E:/python/learning_Tsinghua/test_code/")
if not os.path.isdir(data_base_dir):os.mkdir(data_base_dir)
# work_sapce_dir = os.path.join(data_base_dir, "VOC2007/")
# if not os.path.isdir(work_sapce_dir):
#     os.mkdir(work_sapce_dir)
annotation_dir = os.path.join(data_base_dir, "xmlLabel/train/")
if not os.path.isdir(annotation_dir):os.mkdir(annotation_dir)
clear_hidden_files(annotation_dir)
image_dir = os.path.join(data_base_dir, "train/")
if not os.path.isdir(image_dir):os.mkdir(image_dir)
clear_hidden_files(image_dir)
yolo_labels_dir = os.path.join(data_base_dir, "txtLabel/train/")
if not os.path.isdir(yolo_labels_dir):os.mkdir(yolo_labels_dir)
clear_hidden_files(yolo_labels_dir)
yolov5_images_dir = os.path.join(data_base_dir, "images/")
if not os.path.isdir(yolov5_images_dir):os.mkdir(yolov5_images_dir)
clear_hidden_files(yolov5_images_dir)
yolov5_labels_dir = os.path.join(data_base_dir, "labels/")
if not os.path.isdir(yolov5_labels_dir):os.mkdir(yolov5_labels_dir)
clear_hidden_files(yolov5_labels_dir)
yolov5_images_train_dir = os.path.join(yolov5_images_dir, "train/")
if not os.path.isdir(yolov5_images_train_dir):os.mkdir(yolov5_images_train_dir)
clear_hidden_files(yolov5_images_train_dir)
yolov5_images_test_dir = os.path.join(yolov5_images_dir, "val/")
if not os.path.isdir(yolov5_images_test_dir):os.mkdir(yolov5_images_test_dir)
clear_hidden_files(yolov5_images_test_dir)
yolov5_labels_train_dir = os.path.join(yolov5_labels_dir, "train/")
if not os.path.isdir(yolov5_labels_train_dir):os.mkdir(yolov5_labels_train_dir)
clear_hidden_files(yolov5_labels_train_dir)
yolov5_labels_test_dir = os.path.join(yolov5_labels_dir, "val/")
if not os.path.isdir(yolov5_labels_test_dir):os.mkdir(yolov5_labels_test_dir)
clear_hidden_files(yolov5_labels_test_dir)train_file = open(os.path.join(wd, "yolov5_train.txt"), 'w')
test_file = open(os.path.join(wd, "yolov5_val.txt"), 'w')
train_file.close()
test_file.close()
train_file = open(os.path.join(wd, "yolov5_train.txt"), 'a')
test_file = open(os.path.join(wd, "yolov5_val.txt"), 'a')
list_imgs = os.listdir(image_dir)  # list image files
prob = random.randint(1, 100)
print("Probability: %d" % prob)
for i in range(0, len(list_imgs)):path = os.path.join(image_dir, list_imgs[i])if os.path.isfile(path):image_path = image_dir + list_imgs[i]voc_path = list_imgs[i](nameWithoutExtention, extention) = os.path.splitext(os.path.basename(image_path))(voc_nameWithoutExtention, voc_extention) = os.path.splitext(os.path.basename(voc_path))annotation_name = nameWithoutExtention + '.xml'annotation_path = os.path.join(annotation_dir, annotation_name)label_name = nameWithoutExtention + '.txt'label_path = os.path.join(yolo_labels_dir, label_name)prob = random.randint(1, 100)print("Probability: %d" % prob)if (prob < TRAIN_RATIO):  # train datasetif os.path.exists(annotation_path):train_file.write(image_path + '\n')convert_annotation(nameWithoutExtention)  # convert labelcopyfile(image_path, yolov5_images_train_dir + voc_path)copyfile(label_path, yolov5_labels_train_dir + label_name)else:  # test datasetif os.path.exists(annotation_path):test_file.write(image_path + '\n')convert_annotation(nameWithoutExtention)  # convert labelcopyfile(image_path, yolov5_images_test_dir + voc_path)copyfile(label_path, yolov5_labels_test_dir + label_name)
train_file.close()
test_file.close()

这个代码是我之前修改自己的数据集使用的代码,使用时需要修改classes[ ]里的类别,适合类别数较少的。同时本代码会将数据集按照一定比例划分为训练集和验证集,并相应的划分其标签。
(使用时注意文件夹路径)

数据集json格式转化为xml格式、txt格式相关推荐

  1. yolo图像检测数据集格式转换:xml 与 txt格式相互转换

    格式介绍 一图流介绍的比较详细,一般图像检测数据集格式为txt或者xml格式,在使用labelimg进行标注的时候,可以设置获得不同格式的数据集,以满足不同算法训练格式要求: 一般建议使用pascal ...

  2. Unity 工具类 之 Excel 转换为 json、csv、xml、lua格式

    Unity 工具类 之 Excel 转换为 json.csv.xml.csv 格式 目录 Unity 工具类 之 Excel 转换为 json.csv.xml.csv 格式 一.介绍 二.操作原理 三 ...

  3. 从json提取数据,保存成txt格式

    前段时间有一个需求做文本语义匹配,但是公司的标注数据不够无监督学习效果不够好,只能使用开源的数据集.开源的数据集清洗成json格式,我们从json提取数据保存成txt格式方便后续的使用.JSON数据格 ...

  4. json报文转化为xml报文_JSON与XML互相转化(Jackson)

    选型: json-lib.jar可以做,但是太老了,抛弃: fastjson完全没有xml相关功能: staxon经测试不能用. 最终选用jackson来主要实现. JDK为1.8,如果低于1.8,则 ...

  5. Citypersons数据集转VOC标准格式(YOLO 目标检测txt格式)

    CItyscapes城市数据集包含一组不同的立体视频序列中记录来自50个不同城市的街景,高质量的进行像素级的注释.数据集下载地址(需要申请注册,通过申请才能下载)[https://www.citysc ...

  6. stl2xml.py 代码实现了stl格式转化为xml格式

    #!H:\Sublime_work\Python_work # -*- coding:utf-8 -*- #we can use cmd to control our python programm ...

  7. python将gml格式的图数据转为txt格式,点和边的字符值分别用数字替代

    # -*- coding: utf-8 -*- import networkx as nx import copy# 抽取gml中的数据 # networkx可以直接通过函数从gml文件中读出数据 d ...

  8. txt文本改html没有用,编辑html格式文本可改成txt格式(可以替换或更换某文本)新手...

    该楼层疑似违规已被系统折叠 隐藏此楼查看此楼 新的名字 生活助手 If Clock.Hour < 12 Then TextWindow.WriteLine("早上好,测试者" ...

  9. 文本怎么换成html,编辑html格式文本可改成txt格式(可以替换或更换某文本)新手...

    该楼层疑似违规已被系统折叠 隐藏此楼查看此楼 新的名字 生活助手 If Clock.Hour < 12 Then TextWindow.WriteLine("早上好,测试者" ...

最新文章

  1. 永久代内存java_Java8内存模型—永久代(PermGen)和元空间(Metaspace)
  2. 热电偶校验仪使用说明_热电偶冷端补偿方法
  3. 201521123078 《Java程序设计》第6周学习总结
  4. 七月算法--12月机器学习在线班-第五次课笔记—回归
  5. 转载:MySQL数据库INSERT、UPDATE、DELETE以及REPLACE语句的用法详解
  6. 大数据开发笔记(十):Hbase列存储数据库总结
  7. 巧用「打印」功能实现PDF单页提取
  8. Nginx源码分析 - 实战篇 - 编写一个自定义的模块(24)
  9. ExactScan pro for mac(扫描仪整合工具)
  10. SQLSERVER数据库所有者SID问题
  11. 课程思政与c语言程序设计,C语言程序设计课程思政教学改革教学设计.doc
  12. codejock(codejock chart类)
  13. 抓包工具Charles乱码解决办法
  14. 什么是UKey?Ukey在密评中的应用 双因素身份认证 安当加密
  15. Visual Assist X 10.6.1837完美破解版(带VS2010破解)
  16. 【模拟】1177.正方形
  17. pdf转json_如何显著缩小PDF文件大小?
  18. 一个小玩意 PHP实现微信红包金额拆分试玩
  19. 对 iOS 14.2 糟糕的音乐控制界面的思考
  20. 获取32位的uuid方法

热门文章

  1. 华为交换机、路由器命令合集
  2. excel去掉字符串两端括号
  3. GRASP----(职责分配原则)
  4. 查看linux iptables 配置文件,Linux iptables 配置详解
  5. (六十三)c#Winform自定义控件-箭头(工业)
  6. Linux 面试题 合集
  7. 制作一大片文字的水印图片
  8. 装nagios是php怎么配置文件,nagios安装配置
  9. 微信小程序 textarea浮动键盘弹不出来错误
  10. Dockerfile 中文参考文档