insight face数据打包（一）：LFW

LFW数据集->rec文件

数据集准备：
- raw data下载：(提供两种来源）
  - http://vis-www.cs.umass.edu/lfw/index.html#download
- 已经对齐好到bin文件：https://download.csdn.net/download/ustczhng2012/15002984
数据对齐
- 基于原图+landmark点对齐图片
- LFW直接下载的raw data没有landmark点，一般需要landmark模型生成关键点，然后旋转缩放对齐后再生成bin文件

import cv2
import numpy as np
from skimage import transform as transsrc1 = np.array([[51.642, 50.115], [57.617, 49.990], [35.740, 69.007],[51.157, 89.050], [57.025, 89.702]],dtype=np.float32)
#<--left
src2 = np.array([[45.031, 50.118], [65.568, 50.872], [39.677, 68.111],[45.177, 86.190], [64.246, 86.758]],dtype=np.float32)#---frontal
src3 = np.array([[39.730, 51.138], [72.270, 51.138], [56.000, 68.493],[42.463, 87.010], [69.537, 87.010]],dtype=np.float32)#-->right
src4 = np.array([[46.845, 50.872], [67.382, 50.118], [72.737, 68.111],[48.167, 86.758], [67.236, 86.190]],dtype=np.float32)#-->right profile
src5 = np.array([[54.796, 49.990], [60.771, 50.115], [76.673, 69.007],[55.388, 89.702], [61.257, 89.050]],dtype=np.float32)src = np.array([src1, src2, src3, src4, src5])
src_map = {112: src, 224: src * 2}arcface_src = np.array([[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],[41.5493, 92.3655], [70.7299, 92.2041]],dtype=np.float32)arcface_src = np.expand_dims(arcface_src, axis=0)# lmk is prediction; src is template
def estimate_norm(lmk, image_size=112, mode='arcface'):assert lmk.shape == (5, 2)tform = trans.SimilarityTransform()lmk_tran = np.insert(lmk, 2, values=np.ones(5), axis=1)min_M = []min_index = []min_error = float('inf')if mode == 'arcface':assert image_size == 112src = arcface_srcelse:src = src_map[image_size]for i in np.arange(src.shape[0]):tform.estimate(lmk, src[i])M = tform.params[0:2, :]results = np.dot(M, lmk_tran.T)results = results.Terror = np.sum(np.sqrt(np.sum((results - src[i])**2, axis=1)))#         print(error)if error < min_error:min_error = errormin_M = Mmin_index = ireturn min_M, min_indexdef norm_crop(img, landmark, image_size=112, mode='arcface'):M, pose_index = estimate_norm(landmark, image_size, mode)warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)return warped

生成rec和idx文件
- 可以参考emore.rec生成方法，使用该方法不需要上述数据对齐脚本，里面自带对齐脚本，只需要配置好图片配置txt文件即可
创建pair文件
- 创建pairs.txt脚本：（参考链接）

import glob
import os.path
import numpy as np
import osdef find_not_zero_pos(sstr):for i in range(len(sstr)):if sstr[i] != "0":return ireturn len(sstr) - 1def get_real_str(sstr):i = find_not_zero_pos(sstr)return sstr[i:]def create_match_content():matched_result = set()k = 0sub_dirs = [x[0] for x in os.walk(INPUT_DATA)]while len(matched_result) < 3000:for sub_dir in sub_dirs[1:]:extensions = 'jpg'file_list = []dir_name = os.path.basename(sub_dir)file_glob = os.path.join(INPUT_DATA, dir_name, '*.' + extensions)# glob.glob(file_glob)获取指定目录下的所有图片file_list.extend(glob.glob(file_glob))if not file_list:continueif len(file_list) >= 2:label_name = dir_namelength = len(file_list)random_number1 = np.random.randint(length)random_number2 = np.random.randint(length)while random_number1 == random_number2:random_number1 = np.random.randint(length)random_number2 = np.random.randint(length)base_name1 = os.path.basename(file_list[random_number1 % length])base_name2 = os.path.basename(file_list[random_number2 % length])if (file_list[random_number1 % length] != file_list[random_number2 % length]):base_name1 = base_name1.split("_")[-1]base_name1 = base_name1.split(".")[0]base_name2 = base_name2.split("_")[-1]base_name2 = base_name2.split(".")[0]matched_result.add(label_name + ' ' + get_real_str(base_name1) + ' ' + get_real_str(base_name2))# print(label_name + ' ' + get_real_str(base_name1) + ' ' + get_real_str(base_name2))k = k + 1return matched_result, kdef create_unmatch_content():"""不同类的匹配对"""unmatched_result = set()k = 0while len(unmatched_result) < 3000:sub_dirs = [x[0] for x in os.walk(INPUT_DATA)]length_of_dir = len(sub_dirs)for j in range(24):for i in range(1, length_of_dir):class1 = sub_dirs[i]random_number = np.random.randint(length_of_dir)while random_number == 0 | random_number == i:random_number = np.random.randint(length_of_dir)class2 = sub_dirs[random_number]class1_name = os.path.basename(class1)class2_name = os.path.basename(class2)extensions = 'jpg'file_list1 = []file_list2 = []file_glob1 = os.path.join(INPUT_DATA, class1_name, '*.' + extensions)file_list1.extend(glob.glob(file_glob1))file_glob2 = os.path.join(INPUT_DATA, class2_name, '*.' + extensions)file_list2.extend(glob.glob(file_glob2))if file_list1 and file_list2:base_name1 = os.path.basename(file_list1[j % len(file_list1)])base_name2 = os.path.basename(file_list2[j % len(file_list2)])base_name1 = base_name1.split("_")[-1]base_name1 = base_name1.split(".")[0]base_name1 = get_real_str(base_name1)base_name2 = base_name2.split("_")[-1]base_name2 = base_name2.split(".")[0]base_name2 = get_real_str(base_name2)s = class2_name + ' ' + base_name2 + ' ' + class1_name + ' ' + base_name1if (s not in unmatched_result):unmatched_result.add(s)if len(unmatched_result) > 3000:breakk = k + 1return unmatched_result, kif __name__ == '__main__':INPUT_DATA = r'dataset/val/lfw-deepfunneled'txt_path = 'dataset/val/pairs.txt'if os.path.isfile(txt_path):os.remove(txt_path)result, k1 = create_match_content()print(k1)# print(result)result_un, k2 = create_unmatch_content()print(k2)# print(result_un)file = open(txt_path, 'w')result1 = list(result)result2 = list(result_un)file.write('10 300\n')for i in range(10):for pair in result1[i * 300:i * 300 + 300]:file.write(pair + '\n')for pair in result2[i * 300:i * 300 + 300]:file.write(pair + '\n')file.close()

生成验证集bin文件
- 注意：生成到bin文件数据是BGR格式的，在使用时需要注意

import pickle
import os
from tqdm import tqdm
import cv2
if __name__ == '__main__':lfw_dir = r'dataset/val/lfw-deepfunneled'image_size = (112, 112)pairs_path = r'dataset/val/pairs.txt'bin_output = r'dataset/val/lfw.bin'lfw_bins = []issame_list = []with open(pairs_path, 'r') as f:lines = f.readlines()line = lines[0].strip().split(' ')for i in tqdm(range(1, len(lines))):temp = lines[i].strip().split(' ')if len(temp) == 3:img_path_1 = os.path.join(lfw_dir, temp[0], temp[0]+'_'+temp[1].zfill(4)+'.jpg')img_1 = cv2.imread(img_path_1)img_1 = cv2.resize(img_1, image_size)img_save_path_1 = os.path.join(lfw_dir, temp[0], temp[0] + '_' + temp[1].zfill(4) + '_resize.jpg')cv2.imwrite(img_save_path_1, img_1)with open(img_save_path_1, 'rb') as fin:_bin_1 = fin.read()lfw_bins.append(_bin_1)img_path_2 = os.path.join(lfw_dir, temp[0], temp[0]+'_'+temp[2].zfill(4)+'.jpg')img_2 = cv2.imread(img_path_2)img_2 = cv2.resize(img_2, image_size)img_save_path_2 = os.path.join(lfw_dir, temp[0], temp[0]+'_'+temp[2].zfill(4)+'_resize.jpg')cv2.imwrite(img_save_path_2, img_2)with open(img_save_path_2, 'rb') as fin:_bin_2 = fin.read()lfw_bins.append(_bin_2)issame_list.append(1)elif len(temp) == 4:img_path_1 = os.path.join(lfw_dir, temp[0], temp[0]+'_'+temp[1].zfill(4)+'.jpg')img_1 = cv2.imread(img_path_1)img_1 = cv2.resize(img_1, image_size)img_save_path_1 = os.path.join(lfw_dir, temp[0], temp[0] + '_' + temp[1].zfill(4) + '_resize.jpg')cv2.imwrite(img_save_path_1, img_1)with open(img_save_path_1, 'rb') as fin:_bin_1 = fin.read()lfw_bins.append(_bin_1)img_path_2 = os.path.join(lfw_dir, temp[2], temp[2]+'_'+temp[3].zfill(4)+'.jpg')img_2 = cv2.imread(img_path_2)img_2 = cv2.resize(img_2, image_size)img_save_path_2 = os.path.join(lfw_dir, temp[2], temp[2] + '_' + temp[3].zfill(4) + '_resize.jpg')cv2.imwrite(img_save_path_2, img_2)with open(img_save_path_2, 'rb') as fin:_bin_2 = fin.read()lfw_bins.append(_bin_2)issame_list.append(0)else:print("drop this line: %d" %i)continuewith open(bin_output, 'wb') as f:pickle.dump((lfw_bins, issame_list), f, protocol=pickle.HIGHEST_PROTOCOL)

insight face数据打包（一）：LFW相关推荐

Lua学习教程之可变參数数据打包与解包
利用table的pack与unpack进行数据打包与解包.測试代码例如以下: print("Test table.pack()----------------");function ...
狂雨小说内核26W数据打包带7条可用热门站采集规则
安装教程只需要上传网站源码和数据包到服务器空间,恢复数据库,修改数据库配置文件为自己数据库名字和密码,进后台修改域名即可上线. 带阅读听书插件,单本小说采集拆件导入数据库修改数据库配置文件/www ...
Python 串口数据打包与解析
Python 串口数据打包与解析介绍从字符串到字节流对字节流的解析介绍通常用python写好上位机要与板子通过串口通信的时候(比如命令,参数等),对于这之间的数据格式转换是有特定要求的,比如 ...
Java做rtp解包封包_基于RTP的H视频数据打包解包类DoubleLi博客园.pdf
基于RTP的H视频数据打包解包类DoubleLi博客园 15- 10-30 基于RTP的H264视频数据打包解包类 - DoubleLi - 博客园 DoubleLi 博客园 :: 首页 :: 博问 ...
44项地级市所有数据打包
下载链接:44项地级市数据打包.docx-数据集文档类资源-CSDN下载包含以下44项数据: [数据集]2000-2019年中国地级市人均GDP https://bbs.pinggu.org/thr ...
安防视频监控系统视频上云解决方案EasyCVR语音转发功能音频数据打包发送流程介绍
目前我们的视频上云服务平台EasyCVR已经可集成海康EHome私有协议,并且在前文中我也跟大家讲过EHome协议的配置和调用流程,有兴趣的可以阅读一下:配置及协议介绍.Ehome协议调用流程介绍. ...
MXNet图像数据打包 im2rec.py
记录一下使用MXNet进行图像数据打包时踩过的坑.我使用的MXNet是最新的1.2.0版本,而参考资料中给的基本都是老版本的打包方式,所以过程中出了很多错误,无奈只能看im2rec.py源码,最终完美 ...
基于Dedup的数据打包技术
基于Dedup的数据打包技术作者简介 :刘爱贵,研究方向为网络存储.数据挖掘和分布式计算:毕业于中科院,目前从事存储软件研发工作. Email: Aigui.Liu@gmail.com 注: 作者学 ...
emlog某涛娱乐资源网源码自带200多条数据打包
介绍: 源码说明: 某涛娱乐网源码自带200多数据打包数据可做流量站广告联盟等资源平台源码安装环境:php+mysql 1.上传源码到网站根目录 2.导入数据库到sjk.sql 3.修改数据库配置文件 ...
【FFMPEG】基于RTP的H264视频数据打包解包类
最近考虑使用RTP替换原有的高清视频传输协议,遂上网查找有关H264视频RTP打包.解包的文档和代码.功夫不负有心人,找到不少有价值的文档和代码.参考这些资料,写了H264 RTP打包类.解包类,实现 ...

insight face数据打包（一）：LFW

LFW数据集->rec文件

insight face数据打包（一）：LFW相关推荐

最新文章

热门文章