5. 数据预处理

pytorch数据一般是要写一个类函数来继承Dataset类的，需要定义三个函数__init__(self), len(self), getitem(self)这三个函数，在DBFace中的代码如下所示：

class LDataset(Dataset):def __init__(self, labelfile, imagesdir, numlandmarks, mean, std, width=800, height=800):self.width = widthself.height = heightself.numlandmarks = numlandmarksself.items = common.load_webface(labelfile, imagesdir, numlandmarks)self.mean = meanself.std = stddef __len__(self):return len(self.items)def __getitem__(self, index):...

可以看到初始化还是比较简单的，主要是下载对应的参数，这边LZ其实是修改了一部分代码的，感兴趣的小伙伴可以在github上下载原始代码。

在初始化部分需要看的是这一行

self.items = common.load_webface(labelfile, imagesdir, numlandmarks)

我们来看下load_webface部分：

def load_webface(labelfile, imagesdir, numlandmarks):with open(labelfile, "r") as f:lines = f.readlines()lines = [line.replace("\n", "") for line in lines]stage = 0facials = []file = Nonefiles = []for index, line in enumerate(lines):if line.startswith("#"):if file is not None:files.append([f"{imagesdir}/{file}", parse_facials_webface(facials, numlandmarks)])file = line[2:]facials = []else:facials.append([float(item) for item in line.split(" ")])if file is not None:files.append([f"{imagesdir}/{file}", parse_facials_webface(facials, numlandmarks)])return files

widerface的label.txt如下所示：

# 0--Parade/0_Parade_marchingband_1_849.jpg
449 330 122 149 488.906 373.643 0.0 542.089 376.442 0.0 515.031 412.83 0.0 485.174 425.893 0.0 538.357 431.491 0.0 0.82
# 0--Parade/0_Parade_Parade_0_904.jpg
361 98 263 339 424.143 251.656 0.0 547.134 232.571 0.0 494.121 325.875 0.0 453.83 368.286 0.0 561.978 342.839 0.0 0.89
# 0--Parade/0_Parade_marchingband_1_799.jpg
...

“#”后面有一个空格，后面跟着图片名称，第二行分别是x，y，w，h也就是人脸框左上角的点坐标和对应框的宽度和高度，后面跟着关键点坐标，这个应该还是很好理解的，在parse_facials_webface 函数中要根据自己的关键点数量进行修改，这个看了源码应该很好理解

self.items里面就是保存的要读取图片的路径和对应人脸框和关键点的数据

 # 构建dataset部分,继承torch 的dataset类self.train_dataset = LDataset(labelfile, imagesdir, numlandmarks, mean=self.mean, std=self.std,width=self.width, height=self.height)self.train_loader = DataLoader(dataset=self.train_dataset, batch_size=self.batch_size, shuffle=True,num_workers=24)# 优化器adam,使用默认的weight_decay=0self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)self.per_epoch_batchs = len(self.train_loader)self.iter = 0self.epochs = 150

使用DataLoader类生成一个迭代对象，在训练的时候可以给网络送不同的数据，优化器是adam，学习率是之前设置好的

# warm up一下
lr_scheduer = {1: 1e-3,2: 2e-3,3: 1e-3,60: 1e-4,120: 1e-5}

实际上很多数据的预处理是在训练的阶段进行的，DBFace中APP中的两个函数就是用来训练的

    def train_epoch(self, epoch):for indbatch, (images, heatmap_gt, heatmap_posweight, reg_tlrb, reg_mask, landmark_gt, landmark_mask, num_objs,keep_mask) in enumerate(self.train_loader):self.iter += 1batch_objs = sum(num_objs)batch_size = self.batch_sizeif batch_objs == 0:batch_objs = 1heatmap_gt = heatmap_gt.to(self.gpu_master)heatmap_posweight = heatmap_posweight.to(self.gpu_master)keep_mask = keep_mask.to(self.gpu_master)reg_tlrb = reg_tlrb.to(self.gpu_master)reg_mask = reg_mask.to(self.gpu_master)landmark_gt = landmark_gt.to(self.gpu_master)landmark_mask = landmark_mask.to(self.gpu_master)images = images.to(self.gpu_master)hm, tlrb, landmark = self.model(images)# 把数据压到0-1的范围hm = hm.sigmoid()hm = torch.clamp(hm, min=1e-4, max=1 - 1e-4)# 为什么回归出来框坐标要进行exp处理？# 因为使用exp后的结果进行拟合，换句话说网络推断出来的是log（tlrb）tlrb = torch.exp(tlrb) hm_loss = self.focal_loss(hm, heatmap_gt, heatmap_posweight, keep_mask=keep_mask) / batch_objsreg_loss = self.giou_loss(tlrb, reg_tlrb, reg_mask) * 5  # 这个权重要改吗?landmark_loss = self.landmark_loss(landmark, landmark_gt, landmark_mask) * 0.1loss = hm_loss + reg_loss + landmark_lossself.optimizer.zero_grad()loss.backward()self.optimizer.step()epoch_flt = epoch + indbatch / self.per_epoch_batchsif indbatch % 10 == 0:log.info(f"iter: {self.iter}, lr: {self.lr:g}, epoch: {epoch_flt:.2f}, loss: {loss.item():.2f}, hm_loss: {hm_loss.item():.2f}, "f"box_loss: {reg_loss.item():.2f}, lmdk_loss: {landmark_loss.item():.5f}")if indbatch % 1000 == 0:log.info("save hm")hm_image = hm[0, 0].cpu().data.numpy()common.imwrite(f"{jobdir}/imgs/hm_image.jpg", hm_image * 255)common.imwrite(f"{jobdir}/imgs/hm_image_gt.jpg", heatmap_gt[0, 0].cpu().data.numpy() * 255)image = np.clip((images[0].permute(1, 2, 0).cpu().data.numpy() * self.std + self.mean) * 255, 0,255).astype(np.uint8)outobjs = eval_tool.detect_images_giou_with_netout(hm, tlrb, landmark, threshold=0.1, ibatch=0)im1 = image.copy()for obj in outobjs:common.drawbbox(im1, obj)common.imwrite(f"{jobdir}/imgs/train_result.jpg", im1)def train(self):# warm up?lr_scheduer = {1: 1e-3,2: 2e-3,3: 1e-3,60: 1e-4,120: 1e-5}# trainself.model.train()for epoch in range(self.epochs):if epoch in lr_scheduer:self.set_lr(lr_scheduer[epoch])self.train_epoch(epoch)file = f"{jobdir}/models/{epoch + 1}.pth"common.mkdirs_from_file_path(f让ile)torch.save(self.model.module.state_dict(), file)

在训练的的函数中，pytorch就会调用LDataset中的__getitem__(self, index)，这个其实才是比较关键的数据预处理部分

    def __getitem__(self, index):# 获取对应的图片的路径，objs是对应图片中的人脸框和关键点，如果有多个人脸，就会有多个listimgfile, objs = self.items[index]image = common.imread(imgfile)if image is None:log.info("{} is empty, index={}".format(imgfile, index))return self[random.randint(0, len(self.items) - 1)]keepsize = 12# 进行数据增广image, objs = augment.webface(image, objs, self.numlandmarks, self.width, self.height, keepsize=0)# norm, 固定值可以放到NNIE上去做，进行数据归一化，这个可以在生成wk的时候做，也可以用网络做，当然也可以用cpu做，用neon加速# 现在真的每个操作都得节约时间，1ms也要节约，LZ哭了image = ((image / 255.0 - self.mean) / self.std).astype(np.float32)posweight_radius = 2  # 这个有啥用?,后面高斯核的半径# 这个是通过fpn，输出的feature map stride = 4，加速可以是stride=8，满脑子加速stride = 4fm_width = self.width // stridefm_height = self.height // stride# 这里需要根据关键点的数量进行修改，初始化一些mapheatmap_gt = np.zeros((1, fm_height, fm_width), np.float32)heatmap_posweight = np.zeros((1, fm_height, fm_width), np.float32)keep_mask = np.ones((1, fm_height, fm_width), np.float32)reg_tlrb = np.zeros((1 * 4, fm_height, fm_width), np.float32)reg_mask = np.zeros((1, fm_height, fm_width), np.float32)distance_map = np.zeros((1, fm_height, fm_width), np.float32) + 1000# 我有25个关键点，有x，y坐标，要改成25×2# landmark_gt = np.zeros((1 * 10, fm_height, fm_width), np.float32)# landmark_mask = np.zeros((1, fm_height, fm_width), np.float32)landmark_gt = np.zeros((1 * 50, fm_height, fm_width), np.float32)landmark_mask = np.zeros((1, fm_height, fm_width), np.float32)hassmall = Falsefor obj in objs:isSmallObj = obj.area < keepsize * keepsizeif isSmallObj:cx, cy = obj.safe_scale_center(1 / stride, fm_width, fm_height)keep_mask[0, cy, cx] = 0w, h = obj.width / stride, obj.height / stridex0 = int(common.clip_value(cx - w // 2, fm_width - 1))y0 = int(common.clip_value(cy - h // 2, fm_height - 1))x1 = int(common.clip_value(cx + w // 2, fm_width - 1) + 1)y1 = int(common.clip_value(cy + h // 2, fm_height - 1) + 1)#这个是计算loss的时候的一个参数，也就是说只有有人脸的区域才参与loss的计算，如果不是人脸区域，不参与loss的计算if x1 - x0 > 0 and y1 - y0 > 0:keep_mask[0, y0:y1, x0:x1] = 0hassmall = Truefor obj in objs:classes = 0cx, cy = obj.safe_scale_center(1 / stride, fm_width, fm_height)reg_box = np.array(obj.box) / stride #框的坐标除以对应的strideisSmallObj = obj.area < keepsize * keepsizeif isSmallObj:if obj.area >= 5 * 5:distance_map[classes, cy, cx] = 0reg_tlrb[classes * 4:(classes + 1) * 4, cy, cx] = reg_box  # 通道数代表你回归的框的坐标乘以类别reg_mask[classes, cy, cx] = 1continuew, h = obj.width / stride, obj.height / stridex0 = int(common.clip_value(cx - w // 2, fm_width - 1))y0 = int(common.clip_value(cy - h // 2, fm_height - 1))x1 = int(common.clip_value(cx + w // 2, fm_width - 1) + 1)y1 = int(common.clip_value(cy + h // 2, fm_height - 1) + 1)if x1 - x0 > 0 and y1 - y0 > 0:keep_mask[0, y0:y1, x0:x1] = 1# 参考cornernetw_radius, h_radius = common.truncate_radius((obj.width, obj.height))  # size/(4*stride)gaussian_map = common.draw_truncate_gaussian(heatmap_gt[classes, :, :], (cx, cy), h_radius, w_radius)mxface = 300miface = 25mxline = max(obj.width, obj.height)gamma = (mxline - miface) / (mxface - miface) * 10gamma = min(max(0, gamma), 10) + 1common.draw_gaussian(heatmap_posweight[classes, :, :], (cx, cy), posweight_radius, k=gamma)range_expand_x = math.ceil(w_radius)range_expand_y = math.ceil(h_radius)min_expand_size = 3range_expand_x = max(min_expand_size, range_expand_x)range_expand_y = max(min_expand_size, range_expand_y)icx, icy = cx, cyreg_landmark = Nonefill_threshold = 0.3# 这里也需要根据关键点数量进行修改if obj.haslandmark:reg_landmark = np.array(obj.x5y5_cat_landmark) / stride# x5y5 = [cx] * 5 + [cy] * 5x5y5 = [cx] * 25 + [cy] * 25rvalue = (reg_landmark - x5y5)# landmark_gt[0:10, cy, cx] = np.array(common.log(rvalue)) / 4# 注意这里的loglandmark_gt[0:50, cy, cx] = np.array(common.log(rvalue)) / 4landmark_mask[0, cy, cx] = 1if not obj.rotate:for cx in range(icx - range_expand_x, icx + range_expand_x + 1):for cy in range(icy - range_expand_y, icy + range_expand_y + 1):if cx < fm_width and cy < fm_height and cx >= 0 and cy >= 0:my_gaussian_value = 0.9gy, gx = cy - icy + range_expand_y, cx - icx + range_expand_xif gy >= 0 and gy < gaussian_map.shape[0] and gx >= 0 and gx < gaussian_map.shape[1]:my_gaussian_value = gaussian_map[gy, gx]distance = math.sqrt((cx - icx) ** 2 + (cy - icy) ** 2)if my_gaussian_value > fill_threshold or distance <= min_expand_size:already_distance = distance_map[classes, cy, cx]my_mix_distance = (1 - my_gaussian_value) * distanceif my_mix_distance > already_distance:continuedistance_map[classes, cy, cx] = my_mix_distancereg_tlrb[classes * 4:(classes + 1) * 4, cy, cx] = reg_boxreg_mask[classes, cy, cx] = 1# if hassmall:#     common.imwrite("test_result/keep_mask.jpg", keep_mask[0]*255)#     common.imwrite("test_result/heatmap_gt.jpg", heatmap_gt[0]*255)#     common.imwrite("test_result/keep_ori.jpg", (image*self.std+self.mean)*255)return T.to_tensor(image), heatmap_gt, heatmap_posweight, reg_tlrb, reg_mask, landmark_gt, landmark_mask, len(objs), keep_mask

6. 数据增广

数据增广函数在argument.py脚本中

def webface(image, objs, numlandmarks, outw=800, outh=800, keepsize=8):funcs = [[augmentWithColorJittering, 0.7], [augmentWithFlip, 0.7]]random.shuffle(funcs)num = len(funcs)for n in range(num):func, freq = funcs[n]if randrf(0, 1) < freq:image, objs = func(image, objs)if randrf(0, 1) > 0.5:image, objs = cubeTransform(image, objs, outw, outh, keepsize=keepsize)image, objs = augmentWithCropScaleWebface(image, objs, numlandmarks, outw, outh, 'cube', keepsize=keepsize)else:image, objs = augmentWithCropScaleWebface(image, objs, numlandmarks, outw, outh, keepsize=keepsize)return image, objs

主要是：

augmentWithColorJittering：对颜色的数据增强，包括图像的亮度，对比度和饱和度
augmentWithFlip：水平翻转，这个当中需要注意的是关键点要根据水平翻转后也要进行镜像处理
augmentWithCropScaleWebface：随机裁剪和尺度变换
cubeTransform：立方体转换

后面一篇准备写下网络推断部分，未完待续。。。

DBFace: 源码阅读(二)相关推荐

mybatis源码阅读(二)：mybatis初始化上
转载自 mybatis源码阅读(二):mybatis初始化上 1.初始化入口 //Mybatis 通过SqlSessionFactory获取SqlSession, 然后才能通过SqlSession与 ...
DBFace: 源码阅读(一)
DBFACE: 源码阅读 1. 背景 DBFace框架是可以同时获得人脸检测和关键点定位,相较与人脸检测和关键点定位分开的做法有一定的优势,减少了对原图的crop和resize操作,并且对多人脸的情况 ...
LeGo-LOAM激光雷达定位算法源码阅读(二)
文章目录 1.featureAssociation框架 1.1节点代码主体 1.2 FeatureAssociation构造函数 1.3 runFeatureAssociation()主体函数 2.重 ...
nginx源码阅读(二).初始化:main函数及ngx_init_cycle函数
前言在分析源码时,我们可以先把握主干,然后其他部分再挨个分析就行了.接下来我们先看看nginx的main函数干了些什么. main函数这里先介绍一些下面会遇到的变量类型: ngx_int_t: t ...
【SwinTransformer源码阅读二】Window Attention和Shifted Window Attention部分
先放一下SwinTransformer的整体结构,图片源于原论文,可以发现,在Transformer的Block中 W-MSA(Window based multi-head self attenti ...
datax源码阅读二：Engine流程
一.根据前面python文件知道,java的main函数是com.alibaba.datax.core.Engine public static void main(String[] args) th ...
Struts2源码阅读(二)_ActionContext及CleanUP Filter
1. ActionContext ActionContext是被存放在当前线程中的,获取ActionContext也是从ThreadLocal中获取的.所以在执行拦截器. action和result的 ...
Mybatis源码阅读(二)
本文主要介绍Java中,不使用XML和使用XML构建SqlSessionFactory,通过SqlSessionFactory 中获取SqlSession的方法,使用SqlsessionManager ...
jedis 源码阅读二——jedisPool
文章目录 JedisPool JedisFactory.java GenericObjectPool.java 一个UnitTest加深理解: 我们从这段代码分析jedisPool: JedisPoo ...

DBFace: 源码阅读(二)

5. 数据预处理

6. 数据增广

DBFace: 源码阅读(二)相关推荐

最新文章

热门文章