计算机视觉：数据预处理-图像增广方法

数据预处理
- 随机改变亮暗、对比度和颜色等
- 随机填充
- 随机裁剪
- 随机缩放
- 随机翻转
- 随机打乱真实框排列顺序
- 图像增广方法汇总
批量数据读取与加速

数据预处理

在计算机视觉中，通常会对图像做一些随机的变化，产生相似但又不完全相同的样本。主要作用是扩大训练数据集，抑制过拟合，提升模型的泛化能力，常用的方法见下面的程序。

随机改变亮暗、对比度和颜色等

import numpy as np
import cv2
from PIL import Image, ImageEnhance
import random# 随机改变亮暗、对比度和颜色等
def random_distort(img):# 随机改变亮度def random_brightness(img, lower=0.5, upper=1.5):e = np.random.uniform(lower, upper)return ImageEnhance.Brightness(img).enhance(e)# 随机改变对比度def random_contrast(img, lower=0.5, upper=1.5):e = np.random.uniform(lower, upper)return ImageEnhance.Contrast(img).enhance(e)# 随机改变颜色def random_color(img, lower=0.5, upper=1.5):e = np.random.uniform(lower, upper)return ImageEnhance.Color(img).enhance(e)ops = [random_brightness, random_contrast, random_color]np.random.shuffle(ops)img = Image.fromarray(img)img = ops[0](img)img = ops[1](img)img = ops[2](img)img = np.asarray(img)return img

随机填充

# 随机填充
def random_expand(img,gtboxes,max_ratio=4.,fill=None,keep_ratio=True,thresh=0.5):if random.random() > thresh:return img, gtboxesif max_ratio < 1.0:return img, gtboxesh, w, c = img.shaperatio_x = random.uniform(1, max_ratio)if keep_ratio:ratio_y = ratio_xelse:ratio_y = random.uniform(1, max_ratio)oh = int(h * ratio_y)ow = int(w * ratio_x)off_x = random.randint(0, ow - w)off_y = random.randint(0, oh - h)out_img = np.zeros((oh, ow, c))if fill and len(fill) == c:for i in range(c):out_img[:, :, i] = fill[i] * 255.0out_img[off_y:off_y + h, off_x:off_x + w, :] = imggtboxes[:, 0] = ((gtboxes[:, 0] * w) + off_x) / float(ow)gtboxes[:, 1] = ((gtboxes[:, 1] * h) + off_y) / float(oh)gtboxes[:, 2] = gtboxes[:, 2] / ratio_xgtboxes[:, 3] = gtboxes[:, 3] / ratio_yreturn out_img.astype('uint8'), gtboxes

随机裁剪

随机裁剪之前需要先定义两个函数，multi_box_iou_xywh和box_crop这两个函数将被保存在box_utils.py文件中。

import numpy as npdef multi_box_iou_xywh(box1, box2):"""In this case, box1 or box2 can contain multi boxes.Only two cases can be processed in this method:1, box1 and box2 have the same shape, box1.shape == box2.shape2, either box1 or box2 contains only one box, len(box1) == 1 or len(box2) == 1If the shape of box1 and box2 does not match, and both of them contain multi boxes, it will be wrong."""assert box1.shape[-1] == 4, "Box1 shape[-1] should be 4."assert box2.shape[-1] == 4, "Box2 shape[-1] should be 4."b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2inter_x1 = np.maximum(b1_x1, b2_x1)inter_x2 = np.minimum(b1_x2, b2_x2)inter_y1 = np.maximum(b1_y1, b2_y1)inter_y2 = np.minimum(b1_y2, b2_y2)inter_w = inter_x2 - inter_x1inter_h = inter_y2 - inter_y1inter_w = np.clip(inter_w, a_min=0., a_max=None)inter_h = np.clip(inter_h, a_min=0., a_max=None)inter_area = inter_w * inter_hb1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1)b2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1)return inter_area / (b1_area + b2_area - inter_area)def box_crop(boxes, labels, crop, img_shape):x, y, w, h = map(float, crop)im_w, im_h = map(float, img_shape)boxes = boxes.copy()boxes[:, 0], boxes[:, 2] = (boxes[:, 0] - boxes[:, 2] / 2) * im_w, (boxes[:, 0] + boxes[:, 2] / 2) * im_wboxes[:, 1], boxes[:, 3] = (boxes[:, 1] - boxes[:, 3] / 2) * im_h, (boxes[:, 1] + boxes[:, 3] / 2) * im_hcrop_box = np.array([x, y, x + w, y + h])centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0mask = np.logical_and(crop_box[:2] <= centers, centers <= crop_box[2:]).all(axis=1)boxes[:, :2] = np.maximum(boxes[:, :2], crop_box[:2])boxes[:, 2:] = np.minimum(boxes[:, 2:], crop_box[2:])boxes[:, :2] -= crop_box[:2]boxes[:, 2:] -= crop_box[:2]mask = np.logical_and(mask, (boxes[:, :2] < boxes[:, 2:]).all(axis=1))boxes = boxes * np.expand_dims(mask.astype('float32'), axis=1)labels = labels * mask.astype('float32')boxes[:, 0], boxes[:, 2] = (boxes[:, 0] + boxes[:, 2]) / 2 / w, (boxes[:, 2] - boxes[:, 0]) / wboxes[:, 1], boxes[:, 3] = (boxes[:, 1] + boxes[:, 3]) / 2 / h, (boxes[:, 3] - boxes[:, 1]) / hreturn boxes, labels, mask.sum()

# 随机裁剪
def random_crop(img,boxes,labels,scales=[0.3, 1.0],max_ratio=2.0,constraints=None,max_trial=50):if len(boxes) == 0:return img, boxesif not constraints:constraints = [(0.1, 1.0), (0.3, 1.0), (0.5, 1.0), (0.7, 1.0),(0.9, 1.0), (0.0, 1.0)]img = Image.fromarray(img)w, h = img.sizecrops = [(0, 0, w, h)]for min_iou, max_iou in constraints:for _ in range(max_trial):scale = random.uniform(scales[0], scales[1])aspect_ratio = random.uniform(max(1 / max_ratio, scale * scale), \min(max_ratio, 1 / scale / scale))crop_h = int(h * scale / np.sqrt(aspect_ratio))crop_w = int(w * scale * np.sqrt(aspect_ratio))crop_x = random.randrange(w - crop_w)crop_y = random.randrange(h - crop_h)crop_box = np.array([[(crop_x + crop_w / 2.0) / w,(crop_y + crop_h / 2.0) / h,crop_w / float(w), crop_h / float(h)]])iou = multi_box_iou_xywh(crop_box, boxes)if min_iou <= iou.min() and max_iou >= iou.max():crops.append((crop_x, crop_y, crop_w, crop_h))breakwhile crops:crop = crops.pop(np.random.randint(0, len(crops)))crop_boxes, crop_labels, box_num = box_crop(boxes, labels, crop, (w, h))if box_num < 1:continueimg = img.crop((crop[0], crop[1], crop[0] + crop[2],crop[1] + crop[3])).resize(img.size, Image.LANCZOS)img = np.asarray(img)return img, crop_boxes, crop_labelsimg = np.asarray(img)return img, boxes, labels

随机缩放

# 随机缩放
def random_interp(img, size, interp=None):interp_method = [cv2.INTER_NEAREST,cv2.INTER_LINEAR,cv2.INTER_AREA,cv2.INTER_CUBIC,cv2.INTER_LANCZOS4,]if not interp or interp not in interp_method:interp = interp_method[random.randint(0, len(interp_method) - 1)]h, w, _ = img.shapeim_scale_x = size / float(w)im_scale_y = size / float(h)img = cv2.resize(img, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=interp)return img

随机翻转

# 随机翻转
def random_flip(img, gtboxes, thresh=0.5):if random.random() > thresh:img = img[:, ::-1, :]gtboxes[:, 0] = 1.0 - gtboxes[:, 0]return img, gtboxes

随机打乱真实框排列顺序

# 随机打乱真实框排列顺序
def shuffle_gtbox(gtbox, gtlabel):gt = np.concatenate([gtbox, gtlabel[:, np.newaxis]], axis=1)idx = np.arange(gt.shape[0])np.random.shuffle(idx)gt = gt[idx, :]return gt[:, :4], gt[:, 4]

图像增广方法汇总

# 图像增广方法汇总
def image_augment(img, gtboxes, gtlabels, size, means=None):# 随机改变亮暗、对比度和颜色等img = random_distort(img)# 随机填充img, gtboxes = random_expand(img, gtboxes, fill=means)# 随机裁剪img, gtboxes, gtlabels, = random_crop(img, gtboxes, gtlabels)# 随机缩放img = random_interp(img, size)# 随机翻转img, gtboxes = random_flip(img, gtboxes)# 随机打乱真实框排列顺序gtboxes, gtlabels = shuffle_gtbox(gtboxes, gtlabels)return img.astype('float32'), gtboxes.astype('float32'), gtlabels.astype('int32')

这里得到的img数据数值需要调整，需要除以255，并且减去均值和方差，再将维度从[H, W, C]调整为[C, H, W]。

img, gt_boxes, gt_labels, scales = get_img_data_from_file(record)
size = 512
img, gt_boxes, gt_labels = image_augment(img, gt_boxes, gt_labels, size)
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
mean = np.array(mean).reshape((1, 1, -1))
std = np.array(std).reshape((1, 1, -1))
img = (img / 255.0 - mean) / std
img = img.astype('float32').transpose((2, 0, 1))
img

将上面的过程整理成一个get_img_data函数。

def get_img_data(record, size=640):img, gt_boxes, gt_labels, scales = get_img_data_from_file(record)img, gt_boxes, gt_labels = image_augment(img, gt_boxes, gt_labels, size)mean = [0.485, 0.456, 0.406]std = [0.229, 0.224, 0.225]mean = np.array(mean).reshape((1, 1, -1))std = np.array(std).reshape((1, 1, -1))img = (img / 255.0 - mean) / stdimg = img.astype('float32').transpose((2, 0, 1))return img, gt_boxes, gt_labels, scales

TRAINDIR = '/home/aistudio/work/insects/train'
TESTDIR = '/home/aistudio/work/insects/test'
VALIDDIR = '/home/aistudio/work/insects/val'
cname2cid = get_insect_names()
records = get_annotations(cname2cid, TRAINDIR)record = records[0]
img, gt_boxes, gt_labels, scales = get_img_data(record, size=480)

img.shape

(3, 480, 480)

gt_boxes.shape

(50, 4)

gt_labels

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,0, 4, 0, 0, 0, 0], dtype=int32)

scales

(1244.0, 1244.0)

批量数据读取与加速

下面的程序展示了如何读取一张图片的数据并加速，下面的代码实现了批量数据读取。

# 获取一个批次内样本随机缩放的尺寸
def get_img_size(mode):if (mode == 'train') or (mode == 'valid'):inds = np.array([0,1,2,3,4,5,6,7,8,9])ii = np.random.choice(inds)img_size = 320 + ii * 32else:img_size = 608return img_size# 将 list形式的batch数据 转化成多个array构成的tuple
def make_array(batch_data):img_array = np.array([item[0] for item in batch_data], dtype = 'float32')gt_box_array = np.array([item[1] for item in batch_data], dtype = 'float32')gt_labels_array = np.array([item[2] for item in batch_data], dtype = 'int32')img_scale = np.array([item[3] for item in batch_data], dtype='int32')return img_array, gt_box_array, gt_labels_array, img_scale# 批量读取数据，同一批次内图像的尺寸大小必须是一样的，
# 不同批次之间的大小是随机的，
# 由上面定义的get_img_size函数产生
def data_loader(datadir, batch_size= 10, mode='train'):cname2cid = get_insect_names()records = get_annotations(cname2cid, datadir)def reader():if mode == 'train':np.random.shuffle(records)batch_data = []img_size = get_img_size(mode)for record in records:#print(record)img, gt_bbox, gt_labels, im_shape = get_img_data(record, size=img_size)batch_data.append((img, gt_bbox, gt_labels, im_shape))if len(batch_data) == batch_size:yield make_array(batch_data)batch_data = []img_size = get_img_size(mode)if len(batch_data) > 0:yield make_array(batch_data)return reader

d = data_loader('/home/aistudio/work/insects/train', batch_size=2, mode='train')

img, gt_boxes, gt_labels, im_shape = next(d())

img.shape, gt_boxes.shape, gt_labels.shape, im_shape.shape

((2, 3, 352, 352), (2, 50, 4), (2, 50), (2, 2))

由于数据预处理耗时较长，可能会成为网络训练速度的瓶颈，所以需要对预处理部分进行优化。通过使用飞桨提供的API paddle.reader.xmap_readers可以开启多线程读取数据，具体实现代码如下。

import functools
import paddle# 使用paddle.reader.xmap_readers实现多线程读取数据
def multithread_loader(datadir, batch_size= 10, mode='train'):cname2cid = get_insect_names()records = get_annotations(cname2cid, datadir)def reader():if mode == 'train':np.random.shuffle(records)img_size = get_img_size(mode)batch_data = []for record in records:batch_data.append((record, img_size))if len(batch_data) == batch_size:yield batch_databatch_data = []img_size = get_img_size(mode)if len(batch_data) > 0:yield batch_datadef get_data(samples):batch_data = []for sample in samples:record = sample[0]img_size = sample[1]img, gt_bbox, gt_labels, im_shape = get_img_data(record, size=img_size)batch_data.append((img, gt_bbox, gt_labels, im_shape))return make_array(batch_data)mapper = functools.partial(get_data, )return paddle.reader.xmap_readers(mapper, reader, 8, 10)

d = multithread_loader('/home/aistudio/work/insects/train', batch_size=2, mode='train')

img, gt_boxes, gt_labels, im_shape = next(d())

img.shape, gt_boxes.shape, gt_labels.shape, im_shape.shape

((2, 3, 320, 320), (2, 50, 4), (2, 50), (2, 2))

至此，我们完成了如何查看数据集中的数据、提取数据标注信息、从文件读取图像和标注数据、图像增广、批量读取和加速等过程，通过multithread_loader可以返回img, gt_boxes, gt_labels, im_shape等数据，接下来就可以将它们输入到神经网络，应用到具体算法上了。

在开始具体的算法讲解之前，先补充一下读取测试数据的代码。测试数据没有标注信息，也不需要做图像增广，代码如下所示。

# 测试数据读取# 将 list形式的batch数据 转化成多个array构成的tuple
def make_test_array(batch_data):img_name_array = np.array([item[0] for item in batch_data])img_data_array = np.array([item[1] for item in batch_data], dtype = 'float32')img_scale_array = np.array([item[2] for item in batch_data], dtype='int32')return img_name_array, img_data_array, img_scale_array# 测试数据读取
def test_data_loader(datadir, batch_size= 10, test_image_size=608, mode='test'):"""加载测试用的图片，测试数据没有groundtruth标签"""image_names = os.listdir(datadir)def reader():batch_data = []img_size = test_image_sizefor image_name in image_names:file_path = os.path.join(datadir, image_name)img = cv2.imread(file_path)img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)H = img.shape[0]W = img.shape[1]img = cv2.resize(img, (img_size, img_size))mean = [0.485, 0.456, 0.406]std = [0.229, 0.224, 0.225]mean = np.array(mean).reshape((1, 1, -1))std = np.array(std).reshape((1, 1, -1))out_img = (img / 255.0 - mean) / stdout_img = out_img.astype('float32').transpose((2, 0, 1))img = out_img #np.transpose(out_img, (2,0,1))im_shape = [H, W]batch_data.append((image_name.split('.')[0], img, im_shape))if len(batch_data) == batch_size:yield make_test_array(batch_data)batch_data = []if len(batch_data) > 0:yield make_test_array(batch_data)return reader

计算机视觉：数据预处理-图像增广方法相关推荐

目标检测：数据预处理——图像增广
文章目录一.数据增多(图像增广) 主要作用: 数据预处理 **随机改变亮暗.对比度和颜色等** **随机填充** **随机裁剪** **随机缩放** **随机翻转** **随机打乱真实框排列顺序** ...
计算机视觉之图像增广(翻转、随机裁剪、颜色变化[亮度、对比度、饱和度、色调])
随着深度学习的进步和硬件的更新迭代,计算机视觉技术也得到了更大的提升,在计算机视觉领域,经常要训练深度学习的模型,而训练模型的最终目的是为了更好的应用到实际当中去,那就要解决一个精度问题和泛化能力,对 ...
PaddleClas-图像分类中的8种数据增广方法(cutmix, autoaugment,..)
本文主要来源于PaddleClas这个代码仓库中的数据增广文档:https://github.com/PaddlePaddle/PaddleClas/blob/master/docs/zh_CN/ad ...
【工大SCIR笔记】自然语言处理领域的数据增广方法
点击上方,选择星标或置顶,每天给你送干货! 作者:李博涵来自:哈工大SCIR 1.摘要本文介绍自然语言处理领域的数据增广方法.数据增广(Data Augmentation,也有人将Data Aug ...
自然语言处理领域的数据增广方法
1.摘要本文介绍自然语言处理领域的数据增广方法.数据增广(Data Augmentation,也有人将Data Augmentation翻译为"数据增强",然而"数据增 ...
深度学习-计算机视觉--图像增广
图像增广大规模数据集是成功应用深度神经网络的前提.图像增广(image augmentation)技术通过对训练图像做一系列随机改变,来产生相似但又不同的训练样本,从而扩大训练数据集的规模. 图像增 ...
《动手学深度学习》(六) -- 图像增广与微调
1 图像增广图像增广在对训练图像进行一系列的随机变化之后,生成相似但不同的训练样本,从而扩大了训练集的规模.此外,应用图像增广的原因是,随机改变训练样本可以减少模型对某些属性的依赖,从而提高模型的泛 ...
图像增广：强化深度学习的视觉表现力
目录摘要: 1. 图像增广简介 2. 图像增广的原理 3. 常见的图像增广技术 4. 如何在实际项目中应用图像增广 5.实际应用摘要: 当今,深度学习已经在计算机视觉领域取得了令人瞩目的成就.图像 ...
[pytorch、学习] - 9.1 图像增广
参考 9.1 图像增广在5.6节(深度卷积神经网络)里我们提过,大规模数据集是成功应用神经网络的前提.图像增广(image augmentation)技术通过对训练图像做一系列随机改变,来产生相似但 ...

计算机视觉：数据预处理-图像增广方法

计算机视觉：数据预处理-图像增广方法

数据预处理

随机改变亮暗、对比度和颜色等

随机填充

随机裁剪

随机缩放

随机翻转

随机打乱真实框排列顺序

图像增广方法汇总

批量数据读取与加速

计算机视觉：数据预处理-图像增广方法相关推荐

最新文章

热门文章