MTCNN-tensorflow源码解析-gen_12net

`prepare_data/gen_12net_data.py` 生成训练样本，用于训练 PNet.

anno_file = "wider_face_train.txt"          #存放wider_face数据集中的图片名，和图像中的人脸框参数
im_dir = "../../DATA/WIDER_train/images"    #   ./ 当前目录。../ 父级目录。 / 根目录
pos_save_dir = "../../DATA/12/positive"     #存放人脸
part_save_dir = "../../DATA/12/part"        #存放部分人脸
neg_save_dir = '../../DATA/12/negative'     #非人脸
save_dir = "../../DATA/12"
if not os.path.exists(save_dir):             #创建路径os.mkdir(save_dir)
if not os.path.exists(pos_save_dir):os.mkdir(pos_save_dir)
if not os.path.exists(part_save_dir):os.mkdir(part_save_dir)
if not os.path.exists(neg_save_dir):os.mkdir(neg_save_dir)f1 = open(os.path.join(save_dir, 'pos_12.txt'), 'w')  #将多个路径组合后返回，并打开pos_12.txt文件，进行写入操作
f2 = open(os.path.join(save_dir, 'neg_12.txt'), 'w')
f3 = open(os.path.join(save_dir, 'part_12.txt'), 'w')
with open(anno_file, 'r') as f:annotations = f.readlines()    #按行读取，并返回列表，该列表可以由 Python 的 for... in ... 结构进行处理。
num = len(annotations)             #行数，也是图像总数
print("%d pics in total" % num)
p_idx = 0 # positive
n_idx = 0 # negative
d_idx = 0 # don't care
idx = 0
box_idx = 0
for annotation in annotations:    #遍历每一行（每一个图像）annotation = annotation.strip().split(' ')  # strip()用于移除字符串头尾指定的字符（默认为空格），返回移除字符串头尾指定的字符生成的新字符串# split()通过指定分隔符对字符串进行切片，返回分割后的字符串列表。im_path = annotation[0]  #例如：0--Parade/0_Parade_marchingband_1_849#print(im_path)#boxed change to float typebbox = list(map(float, annotation[1:]))     #图像中人脸框的4个参数#gtboxes = np.array(bbox, dtype=np.float32).reshape(-1, 4)#load imageimg = cv2.imread(os.path.join(im_dir, im_path + '.jpg'))  #根据路径和图像名读取图像idx += 1height, width, channel = img.shapeneg_num = 0#1---->50# keep crop random parts, until have 50 negative examples# 每张图像生成50个负样本while neg_num < 50:#neg_num's size [40,min(width, height) / 2],min_size:40# size is a random number between 12 and min(width,height)size = npr.randint(12, min(width, height) / 2)           #随机生成截取区域的size#top_left coordinatenx = npr.randint(0, width - size)                        #随机生成截取区域的左上点坐标ny = npr.randint(0, height - size)#random cropcrop_box = np.array([nx, ny, nx + size, ny + size])      #截取区域的矩形框的参数Iou = IoU(crop_box, boxes)                               #计算IOU#crop a part from inital imagecropped_im = img[ny : ny + size, nx : nx + size, :]     #截取区域#resize the cropped image to size 12*12resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR) #区域缩放成12*12，用于PNet训练输入if np.max(Iou) < 0.3:                  #IOU<0.3的截取区域设为负样本# Iou with all gts must below 0.3save_file = os.path.join(neg_save_dir, "%s.jpg"%n_idx)f2.write("../../DATA/12/negative/%s.jpg"%n_idx + ' 0\n')    #将负样本的信息写入TXT文件中cv2.imwrite(save_file, resized_im)            #保存负样本n_idx += 1neg_num += 1#for every bounding boxes， box的结构：(x_left, y_top, x_right, y_bottom)for box in boxes:x1, y1, x2, y2 = boxw = x2 - x1 + 1h = y2 - y1 + 1# 对于人脸框小于20，以及人脸框坐标在图像外的样本，进行忽略if max(w, h) < 20 or x1 < 0 or y1 < 0:continue# 在人脸框附近，crop5个矩形区域，使IOU《0.5,将其作为负样本，这样的样本识别难度很大，应用于训练模型，有利于提高模型的准确度for i in range(5):size = npr.randint(12, min(width, height) / 2)# delta_x and delta_y 是相对于(x1, y1)的偏移量delta_x = npr.randint(max(-size, -x1), w)  #max函数的目的是确保，如果delta为负，保证 x1+delta_x >0delta_y = npr.randint(max(-size, -y1), h)nx1 = int(max(0, x1 + delta_x)) # 这里的max不再需要ny1 = int(max(0, y1 + delta_y))if nx1 + size > width or ny1 + size > height: #如果矩形区域右下角超出图像边界，则跳过此次训练，即舍弃该矩形区域continuecrop_box = np.array([nx1, ny1, nx1 + size, ny1 + size])Iou = IoU(crop_box, boxes)                            #计算IOUcropped_im = img[ny1: ny1 + size, nx1: nx1 + size, :]   #截取矩形区域，并转化为12*12尺寸，用于PNet网络训练输入resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)if np.max(Iou) < 0.3:   #IOU<0.3，满足负样本条件，保存图像，并将图像路径+label写入TXT文件# Iou with all gts must below 0.3save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)      f2.write("../../DATA/12/negative/%s.jpg" % n_idx + ' 0\n')cv2.imwrite(save_file, resized_im)n_idx += 1for i in range(20):#根据数据集的人脸图像，生成人脸样本和部分人脸样本，共20个# pos and part face size [minsize*0.8,maxsize*1.25]size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h))) #随机初始化截取的样本的sizeif w<5: #太小的不考虑print (w)continue#print (box)delta_x = npr.randint(-w * 0.2, w * 0.2)  #相对于人脸框中心点的偏移量delta_y = npr.randint(-h * 0.2, h * 0.2)# 生成一个新的人脸框，用于做边框回归预测nx1 = int(max(x1 + w / 2 + delta_x - size / 2, 0))    # nx1 = max(x1+w/2-size/2+delta_x)，（x1+w/2，y1 + h / 2）表示数据集图像中人脸框的中心ny1 = int(max(y1 + h / 2 + delta_y - size / 2, 0))nx2 = nx1 + sizeny2 = ny1 + sizeif nx2 > width or ny2 > height:  #越界就删除continue crop_box = np.array([nx1, ny1, nx2, ny2])#计算两个人脸框的偏移量，将来用于边框回归offset_x1 = (x1 - nx1) / float(size)offset_y1 = (y1 - ny1) / float(size)offset_x2 = (x2 - nx2) / float(size)offset_y2 = (y2 - ny2) / float(size)cropped_im = img[ny1 : ny2, nx1 : nx2, :]resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)box_ = box.reshape(1, -1)iou = IoU(crop_box, box_)if iou  >= 0.65:            # iou  >= 0.65，设为正样本，label设为 1 save_file = os.path.join(pos_save_dir, "%s.jpg"%p_idx)f1.write("../../DATA/12/positive/%s.jpg"%p_idx + ' 1 %.2f %.2f %.2f %.2f\n'%(offset_x1, offset_y1, offset_x2, offset_y2))cv2.imwrite(save_file, resized_im)p_idx += 1elif iou >= 0.4:         #设为部分人脸样本，label设为 -1save_file = os.path.join(part_save_dir, "%s.jpg"%d_idx)f3.write("../../DATA/12/part/%s.jpg"%d_idx + ' -1 %.2f %.2f %.2f %.2f\n'%(offset_x1, offset_y1, offset_x2, offset_y2))cv2.imwrite(save_file, resized_im)d_idx += 1box_idx += 1if idx % 100 == 0:print("%s images done, pos: %s part: %s neg: %s" % (idx, p_idx, d_idx, n_idx))
f1.close()
f2.close()
f3.close()

注解：

1、 ./ 当前目录。../ 父级目录。 / 根目录，在下面3个路径下新建文件夹，分别存放人脸、部分人脸、非人类图像。

pos_save_dir = "../../DATA/12/positive"
part_save_dir = "../../DATA/12/part"
neg_save_dir = '../../DATA/12/negative'
save_dir = "../../DATA/12"

2、os.path.join()函数用于路径拼接文件路径，os.path.join(path1[,path2[,……]])返回值：将多个路径组合后返回。使用细节参考：https://blog.csdn.net/fu6543210/article/details/80032895

f1 = open(os.path.join(save_dir, 'pos_12.txt'), 'w')

f1= "../../DATA/12\\pos_12.txt"

3、一次性读取整个文件(直到结束符 EOF)；自动将文件内容分析成一个行的列表，该列表可以由 Python 的 for... in ... 结构进行处理。例如：

file = open('兼职模特联系方式.txt', 'r')
a = file.readlines()
>>> a ['吴迪 177 70 13888888\n', '王思 170 50 13988888\n', '白雪 167 48 13324434\n', '黄蓉 166 46 13828382']

本文件中annno_file.txt文件中存放的是对应图片中标记的人脸框的位置，每个人脸框用四个参数表示。num代表TXT文件中的图像总数。from：https://www.cnblogs.com/xiugeng/p/8635862.html

with open(anno_file, 'r') as f:annotations = f.readlines()
num = len(annotations)

4、strip() 方法用于移除字符串头尾指定的字符（默认为空格），返回移除字符串头尾指定的字符生成的新字符串；
split()通过指定分隔符对字符串进行切片，返回分割后的字符串列表。

annotation = annotation.strip().split(' ')
im_path = annotation[0]  #例如：0--Parade/0_Parade_marchingband_1_849

例如：annotation="0--Parade/0_Parade_marchingband_1_849 448.51 329.63 570.09 478.23",是字符串，经过上述一行代码处理后变成字符列表：annotation=[0--Parade/0_Parade_marchingband_1_849,448.51,329.63,570.09,478.23]

5、map()是 Python 内置的高阶函数，它接收一个函数 f 和一个 list，并通过把函数 f 依次作用在 list 的每个元素上，得到一个新的 list 并返回。下式是将字符448.51,329.63,570.09,478.23转化为float类型。

bbox = list(map(float, annotation[1:]))
boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4)  #转化为n行4列数组

6、从读取的一幅图片中截取50张非人脸图片。

size=randint(12, min(width, height) / 2)，生成指定范围的一个整数，作为截取的尺寸。因为网络输入时12*12，所以截取尺寸大于12.
nx,ny：随机生成截取矩形区域的左上角坐标。
crop_box=[x,y,w,h]表示一个矩形框。
resized_im：从原图中提取矩形框区域，并resize成12*12
对于iou<0.3的矩形区域，我们设置为非人脸样本。
f2.write("../../DATA/12/negative/%s.jpg"%n_idx + ' 0\n') #将非人脸样本的位置信息，写入TXT文件中

   while neg_num < 50:#neg_num's size [40,min(width, height) / 2],min_size:40# size is a random number between 12 and min(width,height)size = npr.randint(12, min(width, height) / 2)#top_left coordinatenx = npr.randint(0, width - size)ny = npr.randint(0, height - size)#random cropcrop_box = np.array([nx, ny, nx + size, ny + size])#calculate iouIou = IoU(crop_box, boxes)#crop a part from inital imagecropped_im = img[ny : ny + size, nx : nx + size, :]#注意图像行列索引与x,y轴的对应关系resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)if np.max(Iou) < 0.3:# Iou with all gts must below 0.3save_file = os.path.join(neg_save_dir, "%s.jpg"%n_idx)f2.write("../../DATA/12/negative/%s.jpg"%n_idx + ' 0\n')cv2.imwrite(save_file, resized_im)n_idx += 1neg_num += 1

7、boxes是TXT文件中保存的矩形框坐标，采用的是两个角点坐标 (x_left, y_top, x_right, y_bottom)表示矩形框。对一幅图片中的矩形框进行遍历，对每一矩形框，在其附近生成5个非人脸样本。

    for box in boxes:      # box (x_left, y_top, x_right, y_bottom)x1, y1, x2, y2 = boxw = x2 - x1 + 1h = y2 - y1 + 1# 忽略较小的人脸框if max(w, h) < 20 or x1 < 0 or y1 < 0:continue# crop another 5 images near the bounding box if IoU less than 0.5, 作为负样本for i in range(5):#size of the image to be croppedsize = npr.randint(12, min(width, height) / 2)# delta_x and delta_y (x1, y1)的偏移量# max can make sure if the delta is a negative number , x1+delta_x >0delta_x = npr.randint(max(-size, -x1), w)delta_y = npr.randint(max(-size, -y1), h)nx1 = int(max(0, x1 + delta_x)) # max here not really necessary，这一步不再需要，因为上一步已经确保 x1+delta_x >0ny1 = int(max(0, y1 + delta_y))# if the right bottom point is out of image then skipif nx1 + size > width or ny1 + size > height:#右下角的坐标超出边界，跳出本次循环，照这个逻辑，该循环并不能保证每次都能产生5个矩形框continue

8、在人脸矩形框附近生成人脸训练数据和部分人脸数据

w<5的人脸框太小，不适用
nx1,ny1,nx2,ny2:截取的矩形框的坐标
offset_x1 = (x1 - nx1) / float(size)：表示我们得到的训练人脸框样本与真实人脸框的偏移量
计算选取的框与真实人脸框的iou,根据阈值，将矩形框划分为人脸框和部分人脸框。

        for i in range(20):# pos and part face size [minsize*0.8,maxsize*1.25]size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h))) # ceil() 函数返回数字的上入整数# delta here is the offset of box centerif w<5:#  print (w)continue#print (box)delta_x = npr.randint(-w * 0.2, w * 0.2)delta_y = npr.randint(-h * 0.2, h * 0.2)#show this way: nx1 = max(x1+w/2-size/2+delta_x)# x1+ w/2 is the central point, then add offset , then deduct size/2# deduct size/2 to make sure that the right bottom corner will be out ofnx1 = int(max(x1 + w / 2 + delta_x - size / 2, 0))ny1 = int(max(y1 + h / 2 + delta_y - size / 2, 0))nx2 = nx1 + sizeny2 = ny1 + sizeif nx2 > width or ny2 > height:continue crop_box = np.array([nx1, ny1, nx2, ny2])#yu gt de offsetoffset_x1 = (x1 - nx1) / float(size)offset_y1 = (y1 - ny1) / float(size)offset_x2 = (x2 - nx2) / float(size)offset_y2 = (y2 - ny2) / float(size)#cropcropped_im = img[ny1 : ny2, nx1 : nx2, :]resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)box_ = box.reshape(1, -1)iou = IoU(crop_box, box_)if iou  >= 0.65:save_file = os.path.join(pos_save_dir, "%s.jpg"%p_idx)f1.write("../../DATA/12/positive/%s.jpg"%p_idx + ' 1 %.2f %.2f %.2f %.2f\n'%(offset_x1, offset_y1, offset_x2, offset_y2))cv2.imwrite(save_file, resized_im)p_idx += 1elif iou >= 0.4:save_file = os.path.join(part_save_dir, "%s.jpg"%d_idx)f3.write("../../DATA/12/part/%s.jpg"%d_idx + ' -1 %.2f %.2f %.2f %.2f\n'%(offset_x1, offset_y1, offset_x2, offset_y2))cv2.imwrite(save_file, resized_im)d_idx += 1box_idx += 1if idx % 100 == 0:print("%s images done, pos: %s part: %s neg: %s" % (idx, p_idx, d_idx, n_idx))

总结：

从wider_face_train.txt文件中读取图片名，和图像中人脸框坐标，根据图像名和坐标读取我们下载的数据集（人脸检测部分，没有特征点）"../../DATA/WIDER_train/images"中的图像，对于每一张图像，产生（50+人脸附近5）个负样本，随机产生20个正样本和部分样本，（根据iou的大小确定属于哪类样本）截取的矩形区域分别存入

pos_save_dir = "../../DATA/12/positive"
part_save_dir = "../../DATA/12/part"
neg_save_dir = '../../DATA/12/negative'

图像的信息分别存入

f1 = open(os.path.join(save_dir, 'pos_12.txt'), 'w')  #将多个路径组合后返回
f2 = open(os.path.join(save_dir, 'neg_12.txt'), 'w')
f3 = open(os.path.join(save_dir, 'part_12.txt'), 'w')

pos_12.txt内的存储形式：（图像路径+label（正样本为1）+偏移量）："../../DATA/12/positive/%s.jpg"%p_idx + ' 1 (offset_x1, offset_y1, offset_x2, offset_y2)

neg_12.txt内的存储形式（图像路径+label（负样本为0））： "../../DATA/12/negative/%s.jpg"%n_idx + ' 0\n'

part_12.txt'同pos_12.txt，（图像路径+label（部分人脸为-1）+偏移量）

其中：

MTCNN-tensorflow源码解析-gen_12net_data.py相关推荐

Tensorflow源码解析1 -- 内核架构和源码结构
1 主流深度学习框架对比当今的软件开发基本都是分层化和模块化的,应用层开发会基于框架层.比如开发Linux Driver会基于Linux kernel,开发Android app会基于Android ...
pytorch YoLOV3 源码解析 train.py
train.py 总体分为三部分(不算import 库) 初始的一些设定 + train函数 + main函数源码地址: https://github.com/ultralytics/yolov3 ...
MTCNN-tensorflow源码解析-gen_landmark_aug_12.py；gen_imglist_pnet.py
gen_landmark_aug_12.py生成用于PNet网络的训练数据(用于人脸特征点).此外对于RNet,ONet(用于人脸特征点)的训练数据生成与其类似,不再赘述. 主函数: if __nam ...
Tensorflow源码解析5 -- 图的边 - Tensor
1 概述前文两篇文章分别讲解了TensorFlow核心对象Graph,和Graph的节点Operation.Graph另外一大成员,即为其边Tensor.边用来表示计算的数据,它经过上游节点计算后得 ...
Tensorflow源码解析2 -- 前后端连接的桥梁 - Session
1 Session概述 Session是TensorFlow前后端连接的桥梁.用户利用session使得client能够与master的执行引擎建立连接,并通过session.run()来触发一次计算 ...
Tensorflow源码解析3 -- TensorFlow核心对象 - Graph
1 Graph概述计算图Graph是TensorFlow的核心对象,TensorFlow的运行流程基本都是围绕它进行的.包括图的构建.传递.剪枝.按worker分裂.按设备二次分裂.执行.注销等.因 ...
Tensorflow源码解析3 -- TensorFlow核心对象 - Graph 1
1 Graph概述计算图Graph是TensorFlow的核心对象,TensorFlow的运行流程基本都是围绕它进行的.包括图的构建.传递.剪枝.按worker分裂.按设备二次分裂.执行.注销等.因 ...
Tensorflow源码解析2 -- 前后端连接的桥梁 - Session 1
1 Session概述 Session是TensorFlow前后端连接的桥梁.用户利用session使得client能够与master的执行引擎建立连接,并通过session.run()来触发一次计算 ...
Tensorflow源码解析6 -- TensorFlow本地运行时
1 概述 TensorFlow后端分为四层,运行时层.计算层.通信层.设备层.运行时作为第一层,实现了session管理.graph管理等很多重要的逻辑,是十分关键的一层.根据任务分布的不同,运行时又 ...

MTCNN-tensorflow源码解析-gen_12net_data.py

`prepare_data/gen_12net_data.py` 生成训练样本，用于训练 PNet.

总结：

MTCNN-tensorflow源码解析-gen_12net_data.py相关推荐

最新文章

热门文章

MTCNN-tensorflow源码解析-gen_12net_data.py

prepare_data/gen_12net_data.py 生成训练样本，用于训练 PNet.

总结：

MTCNN-tensorflow源码解析-gen_12net_data.py相关推荐

最新文章

热门文章

`prepare_data/gen_12net_data.py` 生成训练样本，用于训练 PNet.