目录

调用数据集

生成train.lst

生成train.rec train.idx


h5py还是比较方便的,推荐使用:

https://blog.csdn.net/jacke121/article/details/119935657

调用数据集

import mxnet as mxclass MXFaceDataset(Dataset):def __init__(self, root_dir, local_rank):super(MXFaceDataset, self).__init__()self.transform = transforms.Compose([transforms.ToPILImage(),transforms.RandomHorizontalFlip(),transforms.ToTensor(),transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),])self.root_dir = root_dirself.local_rank = local_rankpath_imgrec = os.path.join(root_dir, 'train.rec')path_imgidx = os.path.join(root_dir, 'train.idx')self.imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')s = self.imgrec.read_idx(0)header, _ = mx.recordio.unpack(s)if header.flag > 0:self.header0 = (int(header.label[0]), int(header.label[1]))self.imgidx = np.array(range(1, int(header.label[0])))else:self.imgidx = np.array(list(self.imgrec.keys))def __getitem__(self, index):idx = self.imgidx[index]s = self.imgrec.read_idx(idx)header, img = mx.recordio.unpack(s)label = header.labelif not isinstance(label, numbers.Number):label = label[0]label = torch.tensor(label, dtype=torch.long)sample = mx.image.imdecode(img).asnumpy()if self.transform is not None:sample = self.transform(sample)return sample, labeldef __len__(self):return len(self.imgidx)

生成train.lst

import argparse
import glob
import os
import numpy as npimport cv2
import mxnet as mxdef get_id():path_f=r"G:\data\5w"# files = glob.glob(path_f + "/*/*/*/*[bmp,jpg,png]", recursive=True)  # find filefiles = glob.glob(path_f + "/*/*.jpg", recursive=True)  # find filetest_lst = r'G:\data\train_5w/train.lst' #recwith open(test_lst, 'w') as fw:for index, idx in enumerate(files):face_id=int(os.path.basename(os.path.dirname(idx)))fw.writelines(f'1\t{face_id}\t{idx}\n')if __name__ == '__main__':get_id()

生成train.rec train.idx

im2rec.py

主要设置两个参数:

    parser.add_argument('--prefix', help='prefix of input/output lst and rec files.',default=r"G:\data\train_5w\train.lst")parser.add_argument('--root', help='path to folder containing images.',default=r"G:\data\5w")
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import print_function
import os
import syscurr_path = os.path.abspath(os.path.dirname(__file__))
sys.path.append(os.path.join(curr_path, "../python"))
import mxnet as mx
import random
import argparse
import cv2
import time
import tracebacktry:import multiprocessing
except ImportError:multiprocessing = Nonedef list_image(root, recursive, exts):"""Traverses the root of directory that contains images andgenerates image list iterator.Parameters----------root: stringrecursive: boolexts: stringReturns-------image iterator that contains all the image under the specified path"""i = 0if recursive:cat = {}for path, dirs, files in os.walk(root, followlinks=True):dirs.sort()files.sort()for fname in files:fpath = os.path.join(path, fname)suffix = os.path.splitext(fname)[1].lower()if os.path.isfile(fpath) and (suffix in exts):if path not in cat:cat[path] = len(cat)yield (i, os.path.relpath(fpath, root), cat[path])i += 1for k, v in sorted(cat.items(), key=lambda x: x[1]):print(os.path.relpath(k, root), v)else:for fname in sorted(os.listdir(root)):fpath = os.path.join(root, fname)suffix = os.path.splitext(fname)[1].lower()if os.path.isfile(fpath) and (suffix in exts):yield (i, os.path.relpath(fpath, root), 0)i += 1def write_list(path_out, image_list):"""Hepler function to write image list into the file.The format is as below,integer_image_index \t float_label_index \t path_to_imageNote that the blank between number and tab is only used for readability.Parameters----------path_out: stringimage_list: list"""with open(path_out, 'w') as fout:for i, item in enumerate(image_list):line = '%d\t' % item[0]for j in item[2:]:line += '%f\t' % jline += '%s\n' % item[1]fout.write(line)def make_list(args):"""Generates .lst file.Parameters----------args: object that contains all the arguments"""image_list = list_image(args.root, args.recursive, args.exts)image_list = list(image_list)if args.shuffle is True:random.seed(100)random.shuffle(image_list)N = len(image_list)chunk_size = (N + args.chunks - 1) // args.chunksfor i in range(args.chunks):chunk = image_list[i * chunk_size:(i + 1) * chunk_size]if args.chunks > 1:str_chunk = '_%d' % ielse:str_chunk = ''sep = int(chunk_size * args.train_ratio)sep_test = int(chunk_size * args.test_ratio)if args.train_ratio == 1.0:write_list(args.prefix + str_chunk + '.lst', chunk)else:if args.test_ratio:write_list(args.prefix + str_chunk + '_test.lst', chunk[:sep_test])if args.train_ratio + args.test_ratio < 1.0:write_list(args.prefix + str_chunk + '_val.lst', chunk[sep_test + sep:])write_list(args.prefix + str_chunk + '_train.lst', chunk[sep_test:sep_test + sep])def read_list(path_in):"""Reads the .lst file and generates corresponding iterator.Parameters----------path_in: stringReturns-------item iterator that contains information in .lst file"""with open(path_in) as fin:while True:line = fin.readline()if not line:breakline = [i.strip() for i in line.strip().split('\t')]line_len = len(line)# check the data format of .lst fileif line_len < 3:print('lst should have at least has three parts, but only has %s parts for %s' % (line_len, line))continuetry:item = [int(line[0])] + [line[-1]] + [float(i) for i in line[1:-1]]except Exception as e:print('Parsing lst met error for %s, detail: %s' % (line, e))continueyield itemdef image_encode(args, i, item, q_out):"""Reads, preprocesses, packs the image and put it back in output queue.Parameters----------args: objecti: intitem: listq_out: queue"""fullpath = os.path.join(args.root, item[1])if len(item) > 3 and args.pack_label:header = mx.recordio.IRHeader(0, item[2:], item[0], 0)else:header = mx.recordio.IRHeader(0, item[2], item[0], 0)if args.pass_through:try:with open(fullpath, 'rb') as fin:img = fin.read()s = mx.recordio.pack(header, img)q_out.put((i, s, item))except Exception as e:traceback.print_exc()print('pack_img error:', item[1], e)q_out.put((i, None, item))returntry:img = cv2.imread(fullpath, args.color)except:traceback.print_exc()print('imread error trying to load file: %s ' % fullpath)q_out.put((i, None, item))returnif img is None:print('imread read blank (None) image for file: %s' % fullpath)q_out.put((i, None, item))returnif args.center_crop:if img.shape[0] > img.shape[1]:margin = (img.shape[0] - img.shape[1]) // 2img = img[margin:margin + img.shape[1], :]else:margin = (img.shape[1] - img.shape[0]) // 2img = img[:, margin:margin + img.shape[0]]if args.resize:if img.shape[0] > img.shape[1]:newsize = (args.resize, img.shape[0] * args.resize // img.shape[1])else:newsize = (img.shape[1] * args.resize // img.shape[0], args.resize)img = cv2.resize(img, newsize)try:s = mx.recordio.pack_img(header, img, quality=args.quality, img_fmt=args.encoding)q_out.put((i, s, item))except Exception as e:traceback.print_exc()print('pack_img error on file: %s' % fullpath, e)q_out.put((i, None, item))returndef read_worker(args, q_in, q_out):"""Function that will be spawned to fetch the imagefrom the input queue and put it back to output queue.Parameters----------args: objectq_in: queueq_out: queue"""while True:deq = q_in.get()if deq is None:breaki, item = deqimage_encode(args, i, item, q_out)def write_worker(q_out, fname, working_dir):"""Function that will be spawned to fetch processed imagefrom the output queue and write to the .rec file.Parameters----------q_out: queuefname: stringworking_dir: string"""pre_time = time.time()count = 0fname = os.path.basename(fname)fname_rec = os.path.splitext(fname)[0] + '.rec'fname_idx = os.path.splitext(fname)[0] + '.idx'record = mx.recordio.MXIndexedRecordIO(os.path.join(working_dir, fname_idx), os.path.join(working_dir, fname_rec),'w')buf = {}more = Truewhile more:deq = q_out.get()if deq is not None:i, s, item = deqbuf[i] = (s, item)else:more = Falsewhile count in buf:s, item = buf[count]del buf[count]if s is not None:record.write_idx(item[0], s)if count % 1000 == 0:cur_time = time.time()print('time:', cur_time - pre_time, ' count:', count)pre_time = cur_timecount += 1def parse_args():parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Create an image list or \make a record database by reading from an image list')parser.add_argument('--prefix', help='prefix of input/output lst and rec files.',default=r"G:\data\train_5w\train.lst")parser.add_argument('--root', help='path to folder containing images.',default=r"G:\data\5w")cgroup = parser.add_argument_group('Options for creating image lists')cgroup.add_argument('--list', action='store_true', help='If this is set im2rec will create image list(s) by traversing root folder\and output to <prefix>.lst.\Otherwise im2rec will read <prefix>.lst and create a database at <prefix>.rec')cgroup.add_argument('--exts', nargs='+', default=['.jpeg', '.jpg', '.png'],help='list of acceptable image extensions.')cgroup.add_argument('--chunks', type=int, default=1, help='number of chunks.')cgroup.add_argument('--train-ratio', type=float, default=1.0, help='Ratio of images to use for training.')cgroup.add_argument('--test-ratio', type=float, default=0, help='Ratio of images to use for testing.')cgroup.add_argument('--recursive', action='store_true', help='If true recursively walk through subdirs and assign an unique label\to images in each folder. Otherwise only include images in the root folder\and give them label 0.')cgroup.add_argument('--no-shuffle', dest='shuffle', action='store_false', help='If this is passed, \im2rec will not randomize the image order in <prefix>.lst')rgroup = parser.add_argument_group('Options for creating database')rgroup.add_argument('--pass-through', action='store_true',help='whether to skip transformation and save image as is')rgroup.add_argument('--resize', type=int, default=0, help='resize the shorter edge of image to the newsize, original images will\be packed by default.')rgroup.add_argument('--center-crop', action='store_true',help='specify whether to crop the center image to make it rectangular.')rgroup.add_argument('--quality', type=int, default=95,help='JPEG quality for encoding, 1-100; or PNG compression for encoding, 1-9')rgroup.add_argument('--num-thread', type=int, default=1, help='number of thread to use for encoding. order of images will be different\from the input list if >1. the input list will be modified to match the\resulting order.')rgroup.add_argument('--color', type=int, default=1, choices=[-1, 0, 1], help='specify the color mode of the loaded image.\1: Loads a color image. Any transparency of image will be neglected. It is the default flag.\0: Loads image in grayscale mode.\-1:Loads image as such including alpha channel.')rgroup.add_argument('--encoding', type=str, default='.jpg', choices=['.jpg', '.png'],help='specify the encoding of the images.')rgroup.add_argument('--pack-label', action='store_true',help='Whether to also pack multi dimensional label in the record file')args = parser.parse_args()args.prefix = os.path.abspath(args.prefix)args.root = os.path.abspath(args.root)return argsif __name__ == '__main__':args = parse_args()# if the '--list' is used, it generates .lst fileif args.list:make_list(args)# otherwise read .lst file to generates .rec fileelse:if os.path.isdir(args.prefix):working_dir = args.prefixelse:working_dir = os.path.dirname(args.prefix)files = [os.path.join(working_dir, fname) for fname in os.listdir(working_dir) ifos.path.isfile(os.path.join(working_dir, fname))]count = 0for fname in files:if fname.startswith(args.prefix) and fname.endswith('.lst'):print('Creating .rec file from', fname, 'in', working_dir)count += 1image_list = read_list(fname)# -- write_record -- #if args.num_thread > 1 and multiprocessing is not None:q_in = [multiprocessing.Queue(1024) for i in range(args.num_thread)]q_out = multiprocessing.Queue(1024)# define the processread_process = [multiprocessing.Process(target=read_worker, args=(args, q_in[i], q_out)) for i inrange(args.num_thread)]# process images with num_thread processfor p in read_process:p.start()# only use one process to write .rec to avoid race-condtionwrite_process = multiprocessing.Process(target=write_worker, args=(q_out, fname, working_dir))write_process.start()# put the image list into input queuefor i, item in enumerate(image_list):q_in[i % len(q_in)].put((i, item))for q in q_in:q.put(None)for p in read_process:p.join()q_out.put(None)write_process.join()else:print('multiprocessing not available, fall back to single threaded encoding')try:import Queue as queueexcept ImportError:import queueq_out = queue.Queue()fname = os.path.basename(fname)fname_rec = os.path.splitext(fname)[0] + '.rec'fname_idx = os.path.splitext(fname)[0] + '.idx'record = mx.recordio.MXIndexedRecordIO(os.path.join(working_dir, fname_idx),os.path.join(working_dir, fname_rec), 'w')cnt = 0pre_time = time.time()for i, item in enumerate(image_list):image_encode(args, i, item, q_out)if q_out.empty():continue_, s, _ = q_out.get()record.write_idx(item[0], s)if cnt % 1000 == 0:cur_time = time.time()print('time:', cur_time - pre_time, ' count:', cnt)pre_time = cur_timecnt += 1if not count:print('Did not find and list file with prefix %s' % args.prefix)

mxnet制作人脸识别训练集相关推荐

  1. TF之NN:利用DNN算法(SGD+softmax+cross_entropy)对mnist手写数字图片识别训练集(TF自带函数下载)实现87.4%识别

    TF之NN:利用DNN算法(SGD+softmax+cross_entropy)对mnist手写数字图片识别训练集(TF自带函数下载)实现87.4%识别 目录 输出结果 代码设计 输出结果 代码设计 ...

  2. 人脸识别合集 | 10 ArcFace解析

    转自:https://zhuanlan.zhihu.com/p/76541084 ArcFace/InsightFace(弧度)是伦敦帝国理工学院邓建康等在2018.01发表,在SphereFace基 ...

  3. 全球最大最干净的人脸公开训练集!格灵深瞳发布Glint360K

    关注视 作者丨安翔君 来源丨格灵深瞳 编辑丨极市平台 已经开源,先贴地址~ 代码和数据地址: https://github.com/deepinsight/insightface/tree/maste ...

  4. Combined Margin loss人脸识别训练笔记

    利用caffe第三方实现的combined margin_layer进行训练(https://github.com/gehaocool/CombinedMargin-caffe),数据集采用VGGFa ...

  5. 基于FaceNet的实时人脸识别训练

    FaceNet人脸特征提取 FaceNet是一种用于提取人脸图像特征的深度神经网络.它由谷歌研究人员 Schroff 等人提出. 论文地址:https://arxiv.org/abs/1503.038 ...

  6. 使用opencv制作人脸识别小软件

    正文: 1.既然说做个小软件那就,先做个简单的软件封面. 随便找一个图片,然后手动画上自己需要的按钮,然后设置鼠标反应. 画按钮: void buttonset() {rectangle(image, ...

  7. 命名实体识别训练集汇总(一直更新)

    版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明. 本文链接:https://blog.csdn.net/leitouguan8655/artic ...

  8. VGG人脸识别训练心得

    20170929进行更新:将代码传至github上.网址:https://github.com/KaiJin1995/MTCNN-VGG-face. 该网络可以直接进行人脸检测.识别以及陌生人报警.构 ...

  9. 如何使用Python制作人脸识别系统(2022/5/28版)持续更新

    首先,你必须安装Anaconda Individual Edition.链接如下:Anaconda | Individual Edition 注意:如果你有管理员权限,请安装给ALL Users!!! ...

最新文章

  1. MyEclipse2017在线安装SVN
  2. PCL中分割_欧式分割(1)
  3. 浅谈SQL注入风险 - 一个Login拿下Server(转)
  4. 电脑公司 Ghost XP SP3 国庆特别版 v2011.10
  5. 在手机测试html,借助 IIS 管理器 -- 用手机测试HTML页面
  6. 前端学习(970):fastclick插件使用
  7. 聊聊、Highcharts 动态数据
  8. sentinel 端口_Sentinel原理:控制台是如何获取到实时数据的
  9. iphone屏蔽系统更新_手机资讯:iOS12 屏蔽系统更新的描述文件已失效怎么办
  10. slitaz c语言开发环境,makefile和cmake的简单使用
  11. 通过Keepalived实现Redis Failover自动故障切换
  12. PN序列的产生以及相关函数的计算
  13. ECShop Discuz 和帝国CMS
  14. 微信H5开发-采坑记
  15. 平衡自行车的原理以及制作方法
  16. mac电脑查看CPU温度
  17. 国外最流行的Bootstrap后台管理模板
  18. PS制作马赛克效果、炫酷光线
  19. 有道云笔记markdown最好的转PDF格式文档方法
  20. 研究人员有助的网站 呸子

热门文章

  1. shell中数字大小的比较
  2. windows 下 sublime Text3 做 Python 编辑器
  3. Linux自动备份文件(linux计划任务)
  4. 编程之美2.5 寻找最大的K个数
  5. mysql单机多实例主从_【转载】MySQL单机多实例安装并配置主从复制
  6. 绝地求生服务器延迟高低排序,绝地求生:延迟不可怕,大神教你如何高ping吃鸡!...
  7. 大工18秋c c 语言程序设计,大工19秋《可编程控制器》在线作业3【满分答案】
  8. “sudo: go:找不到命令”完美解决方案
  9. Linux初学者接住了---Linux常用命令
  10. linux中html的图片显示不出来,如何在HTML中显示原始的rgb图像