NVIDIA DALI从入门到放弃之五：Image Processing

NVIDIA DALI从入门到放弃之一：概述
NVIDIA DALI从入门到放弃之二：入门示例
NVIDIA DALI从入门到放弃之三：Data Loading
NVIDIA DALI从入门到放弃之四：Multiple GPU
NVIDIA DALI从入门到放弃之五：Image Processing
NVIDIA DALI从入门到放弃之六：Geometric Transforms
NVIDIA DALI从入门到放弃之七：Sequence Processing
NVIDIA DALI从入门到放弃之八：PyTorch Plugin API

1 Image Decoder

1-1 CPU

class ImageDecoderCropPipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super(ImageDecoderCropPipeline, self).__init__(batch_size, num_threads, device_id, seed = seed)self.input = ops.FileReader(file_root = image_dir)self.pos_rng_x = ops.random.Uniform(range = (0.0, 1.0))self.pos_rng_y = ops.random.Uniform(range = (0.0, 1.0))self.decode = ops.ImageDecoderCrop(device = 'cpu', output_type = types.RGB, crop = (224, 224))def define_graph(self):jpegs, labels = self.input()pos_x = self.pos_rng_x()pos_y = self.pos_rng_y()images = self.decode(jpegs, crop_pos_x=pos_x, crop_pos_y=pos_y)return (images, labels)pipe = ImageDecoderCropPipeline(batch_size, 1, 0)
pipe.build()
images, _ = pipe.run()
show_images( images )

1-2 GPU

import numpy as npclass ExternalInputIterator(object):def __init__(self, batch_size):self.batch_size = batch_sizedef __iter__(self):self.i = 0self.n = self.batch_sizereturn selfdef __next__(self):pos = []size = []for _ in range(self.batch_size):pos.append(np.asarray([0.4, 0.2], dtype=np.float32))size.append(np.asarray([0.3, 0.5], dtype=np.float32))self.i = (self.i + 1) % self.nreturn (pos, size)next = __next__eii = ExternalInputIterator(batch_size)
pos_size_iter = iter(eii)class ImageDecoderSlicePipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super(ImageDecoderSlicePipeline, self).__init__(batch_size, num_threads, device_id, seed = seed)self.input = ops.FileReader(file_root = image_dir)self.input_crop_pos = ops.ExternalSource()self.input_crop_size = ops.ExternalSource()self.input_crop = ops.ExternalSource()self.decode = ops.ImageDecoderSlice(device = 'mixed', output_type = types.RGB)def define_graph(self):jpegs, labels = self.input()self.crop_pos = self.input_crop_pos()self.crop_size = self.input_crop_size()images = self.decode(jpegs, self.crop_pos, self.crop_size)return (images, labels)def iter_setup(self):(crop_pos, crop_size) = pos_size_iter.next()self.feed_input(self.crop_pos, crop_pos)self.feed_input(self.crop_size, crop_size)pipe = ImageDecoderSlicePipeline(batch_size, 1, 0)
pipe.build()
images, _ = pipe.run()
show_images( images.as_cpu() )

2 Color Space Conversion

2-1 CPU

class ColorCpuPipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super(SimplePipeline, self).__init__(batch_size, num_threads, device_id, seed = 12)self.input = ops.FileReader(file_root = image_dir)self.decode = ops.ImageDecoder(device = 'cpu', output_type = types.RGB)self.conversions = {}self.conversions['rgb2bgr'] = ops.ColorSpaceConversion(device = 'cpu', image_type = types.RGB, output_type = types.BGR)self.conversions['rgb2ycbcr'] = ops.ColorSpaceConversion(device = 'cpu', image_type = types.RGB, output_type = types.YCbCr)self.conversions['rgb2gray'] = ops.ColorSpaceConversion(device = 'cpu', image_type = types.RGB, output_type = types.GRAY)def define_graph(self):self.jpegs, self.labels = self.input()images = self.decode(self.jpegs)outputs = [images]outs = [images for _ in range(n)]outs[0] = self.conversions['rgb2bgr'](outs[0])outs[1] = self.conversions['rgb2ycbcr'](outs[1])outs[2] = self.conversions['rgb2gray'](outs[2])return outputs + outs

2-2 GPU

class ColorGpuPipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super(SimplePipeline, self).__init__(batch_size, num_threads, device_id, seed = 12)self.input = ops.FileReader(file_root = image_dir)self.decode = ops.ImageDecoder(device="mixed", output_type = types.RGB)self.conversions = {}self.conversions['rgb2bgr'] = ops.ColorSpaceConversion(device = 'gpu', image_type = types.RGB, output_type = types.BGR)self.conversions['rgb2ycbcr'] = ops.ColorSpaceConversion(device = 'gpu', image_type = types.RGB, output_type = types.YCbCr)self.conversions['rgb2gray'] = ops.ColorSpaceConversion(device = 'gpu', image_type = types.RGB, output_type = types.GRAY)def define_graph(self):self.jpegs, self.labels = self.input()images = self.decode(self.jpegs)outputs = [images.gpu()]outs = [images.gpu() for _ in range(n)]outs[0] = self.conversions['rgb2bgr'](outs[0])outs[1] = self.conversions['rgb2ycbcr'](outs[1])outs[2] = self.conversions['rgb2gray'](outs[2])return outputs + outs

3 BrightnessContrast

3-1 CPU

class BCCpuPipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super(BCCpuPipeline, self).__init__(batch_size, num_threads, device_id, seed=42)self.input = ops.FileReader(device="cpu", file_root=image_filename)self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB)self.bc = ops.BrightnessContrast(device="cpu", brightness_shift=0.3, contrast=0.4, contrast_center=100)def define_graph(self):read, _ = self.input()image = self.decode(read)converted = self.bc(image)return image, convertedpipecpu = BCCpuPipeline(batch_size=batch_size, num_threads=1, device_id=0)
pipecpu.build()
cpu_output = pipecpu.run()

3-2 GPU

class BCGpuPipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super(BCGpuPipeline, self).__init__(batch_size, num_threads, device_id, seed=42)self.input = ops.FileReader(device="cpu", file_root=image_filename)self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB)self.bc = ops.BrightnessContrast(device="gpu",contrast=1.5, brightness_shift = 1, brightness=-1)def define_graph(self):read, _ = self.input()image = self.decode(read)converted = self.bc(image.gpu())return image, convertedpipegpu = BCGpuPipeline(batch_size=batch_size, num_threads=1, device_id=0)
pipegpu.build()
gpu_output = pipegpu.run()

4 HSV

4-1 CPU

class HsvCpuPipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super().__init__(batch_size, num_threads, device_id, seed=42)self.input = ops.FileReader(device="cpu", file_root=image_filename)self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB)self.hsv = ops.Hsv(device="cpu", hue=120, saturation=1, value=0.4)def define_graph(self):read, _ = self.input()image = self.decode(read)converted = self.hsv(image)return image, converted

4-2 GPU

class HsvGpuPipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super().__init__(batch_size, num_threads, device_id, seed=42)self.input = ops.FileReader(device="cpu", file_root=image_filename)self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB)self.hsv = ops.Hsv(device="gpu", hue=120, saturation=2, value=1)def define_graph(self):read, _ = self.input()image = self.decode(read)converted = self.hsv(image.gpu())return image, converted

5 Resize

Scaling modes

“default” - the dimensions which are specified, are scaled to the requested size; the missing extents are calculated by applying average scale of the provided extents - for 2D and one extent specified, this means that aspect ratio is preserved

“stretch” - the dimensions which are specified, are scaled to the requested size; the missing ones are not scaled at all

“not_larger” - the image is scaled so that no dimension exceeds the specified size; aspect ratio is preserved

“not_smaller” - the image is scaled so that no dimension is smaler than specified (additionally, max_size argument may be used to limit upscaling of inputs with very high aspect ratios)

batch_size = 8pipe = dali.pipeline.Pipeline(batch_size, 3, 0)
with pipe:files, labels = dali.fn.caffe_reader(path = db_folder, random_shuffle = True, seed = 1234)images = dali.fn.image_decoder(files, device = "mixed")width = 200height = 100out = [dali.fn.resize(images, size=[height, width]),dali.fn.resize(images, resize_x=width),dali.fn.resize(images, resize_y=height),dali.fn.resize(images, size=[height, width], mode="stretch"),dali.fn.resize(images, resize_x=width, mode="stretch"),dali.fn.resize(images, resize_y=height, mode="stretch"),dali.fn.resize(images, size=[height, width], mode="not_larger"),dali.fn.resize(images, size=[height, width], mode="not_smaller"),dali.fn.resize(images, size=[height, width], mode="not_smaller", max_size=[110,200]),]pipe.set_outputs(*out)pipe.build()
pipe_out = pipe.run()

6 WarpAffine

def random_transform(index):dst_cx, dst_cy = (200,200)src_cx, src_cy = (200,200)# This function uses homogeneous coordinates - hence, 3x3 matrix# translate output coordinates to center defined by (dst_cx, dst_cy)t1 = np.array([[1, 0, -dst_cx],[0, 1, -dst_cy],[0, 0, 1]])def u():return np.random.uniform(-0.5, 0.5)# apply a randomized affine transform - uniform scaling + some random distortionm = np.array([[1 + u(),     u(),  0],[    u(), 1 + u(),  0],[      0,       0,  1]])# translate input coordinates to center (src_cx, src_cy)t2 = np.array([[1, 0, src_cx],[0, 1, src_cy],[0, 0, 1]])# combine the transformsm = (np.matmul(t2, np.matmul(m, t1)))# remove the last row; it's not used by affine transformreturn m[0:2,0:3]def gen_transforms(batch_size, single_transform_fn):out = np.zeros([batch_size, 2, 3])for i in range(batch_size):out[i,:,:] = single_transform_fn(i)return out.astype(np.float32)np.random.seed(seed = 123)class ExamplePipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id, pipelined = True, exec_async = True):super(ExamplePipeline, self).__init__(batch_size, num_threads, device_id,seed = 12, exec_pipelined=pipelined, exec_async=exec_async)# The reader reads raw files from some storage - in this case, a Caffe LMDB containerself.input = ops.CaffeReader(path = db_folder, random_shuffle = True)# The decoder takes tensors containing raw files and outputs images# as 3D tensors with HWC layoutself.decode = ops.ImageDecoder(device = "cpu", output_type = types.RGB)# This example uses ExternalSource to provide warp matricesself.transform_source = ops.ExternalSource()self.iter = 0self.warp_gpu = ops.WarpAffine(device = "gpu",size = (400,400),                 # specify the output size# fill_value                        # not specifying `fill_value`#   results in source coordinate clampinginterp_type = types.INTERP_LINEAR # use linear interpolation)self.warp_cpu = ops.WarpAffine(device = "cpu",fill_value = 200,size = (400,400),                 # specify the output sizeinterp_type = types.INTERP_NN     # use nearest neighbor interpolation)self.warp_keep_size = ops.WarpAffine(device = "gpu",# size                              # keep original canvas sizeinterp_type = types.INTERP_LINEAR # use linear interpolation)# Then, we can tie the operators together to form a graphdef define_graph(self):self.transform = self.transform_source()self.jpegs, self.labels = self.input()images = self.decode(self.jpegs)outputs = [images.gpu()]# pass the transform parameters through GPU memoryoutputs += [self.warp_gpu(images.gpu(), self.transform.gpu())]# pass the transform through a named inputoutputs += [self.warp_cpu(images, matrix = self.transform).gpu()]outputs += [self.warp_keep_size(images.gpu(), self.transform.gpu())]return [self.labels, self.transform] + outputs# Since we're using ExternalSource, we need to feed the externally provided data to the pipelinedef iter_setup(self):# Generate the transforms for the batch and feed them to the ExternalSourceself.feed_input(self.transform, gen_transforms(self.batch_size, random_transform))batch_size = 32
pipe = ExamplePipeline(batch_size=batch_size, num_threads=2, device_id = 0)
pipe.build()
pipe_out = pipe.run()

7 3D Transforms

pipe = Pipeline(batch_size = 1, num_threads = 3, device_id = 0, exec_pipelined = True, exec_async = True)
with pipe:data = fn.external_source(source=GetData, device="gpu", layout="DHWC")resized = fn.resize(data,resize_z = 224,mode = "stretch",  # scale only Z axisinterp_type = types.INTERP_LANCZOS3)angle = fn.random.uniform(range=(-20, 20), seed=123)axis = fn.random.uniform(range=(-1,1), shape=[3])rotated = fn.rotate(resized,angle = angle,axis = axis,interp_type = types.INTERP_LINEAR,fill_value = 0  # force out-of-bounds pixels to 0)warped = fn.warp_affine(resized,size = (200, 320, 320),matrix = (1, 1, 0,   -180,0, 1, 0.2, -20,0, 0, 1,   10),interp_type = types.INTERP_LINEAR,fill_value = 0  # force out-of-bounds pixels to 0)pipe.set_outputs(data, resized, rotated, warped, axis, angle)pipe.build()
pipe_out = pipe.run()