NVIDIA DALI从入门到放弃之一:概述
NVIDIA DALI从入门到放弃之二:入门示例
NVIDIA DALI从入门到放弃之三:Data Loading
NVIDIA DALI从入门到放弃之四:Multiple GPU
NVIDIA DALI从入门到放弃之五:Image Processing
NVIDIA DALI从入门到放弃之六:Geometric Transforms
NVIDIA DALI从入门到放弃之七:Sequence Processing
NVIDIA DALI从入门到放弃之八:PyTorch Plugin API

1 Image Decoder

1-1 CPU

class ImageDecoderCropPipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super(ImageDecoderCropPipeline, self).__init__(batch_size, num_threads, device_id, seed = seed)self.input = ops.FileReader(file_root = image_dir)self.pos_rng_x = ops.random.Uniform(range = (0.0, 1.0))self.pos_rng_y = ops.random.Uniform(range = (0.0, 1.0))self.decode = ops.ImageDecoderCrop(device = 'cpu', output_type = types.RGB, crop = (224, 224))def define_graph(self):jpegs, labels = self.input()pos_x = self.pos_rng_x()pos_y = self.pos_rng_y()images = self.decode(jpegs, crop_pos_x=pos_x, crop_pos_y=pos_y)return (images, labels)pipe = ImageDecoderCropPipeline(batch_size, 1, 0)
pipe.build()
images, _ = pipe.run()
show_images( images )

1-2 GPU

import numpy as npclass ExternalInputIterator(object):def __init__(self, batch_size):self.batch_size = batch_sizedef __iter__(self):self.i = 0self.n = self.batch_sizereturn selfdef __next__(self):pos = []size = []for _ in range(self.batch_size):pos.append(np.asarray([0.4, 0.2], dtype=np.float32))size.append(np.asarray([0.3, 0.5], dtype=np.float32))self.i = (self.i + 1) % self.nreturn (pos, size)next = __next__eii = ExternalInputIterator(batch_size)
pos_size_iter = iter(eii)class ImageDecoderSlicePipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super(ImageDecoderSlicePipeline, self).__init__(batch_size, num_threads, device_id, seed = seed)self.input = ops.FileReader(file_root = image_dir)self.input_crop_pos = ops.ExternalSource()self.input_crop_size = ops.ExternalSource()self.input_crop = ops.ExternalSource()self.decode = ops.ImageDecoderSlice(device = 'mixed', output_type = types.RGB)def define_graph(self):jpegs, labels = self.input()self.crop_pos = self.input_crop_pos()self.crop_size = self.input_crop_size()images = self.decode(jpegs, self.crop_pos, self.crop_size)return (images, labels)def iter_setup(self):(crop_pos, crop_size) = pos_size_iter.next()self.feed_input(self.crop_pos, crop_pos)self.feed_input(self.crop_size, crop_size)pipe = ImageDecoderSlicePipeline(batch_size, 1, 0)
pipe.build()
images, _ = pipe.run()
show_images( images.as_cpu() )

2 Color Space Conversion

2-1 CPU

class ColorCpuPipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super(SimplePipeline, self).__init__(batch_size, num_threads, device_id, seed = 12)self.input = ops.FileReader(file_root = image_dir)self.decode = ops.ImageDecoder(device = 'cpu', output_type = types.RGB)self.conversions = {}self.conversions['rgb2bgr'] = ops.ColorSpaceConversion(device = 'cpu', image_type = types.RGB, output_type = types.BGR)self.conversions['rgb2ycbcr'] = ops.ColorSpaceConversion(device = 'cpu', image_type = types.RGB, output_type = types.YCbCr)self.conversions['rgb2gray'] = ops.ColorSpaceConversion(device = 'cpu', image_type = types.RGB, output_type = types.GRAY)def define_graph(self):self.jpegs, self.labels = self.input()images = self.decode(self.jpegs)outputs = [images]outs = [images for _ in range(n)]outs[0] = self.conversions['rgb2bgr'](outs[0])outs[1] = self.conversions['rgb2ycbcr'](outs[1])outs[2] = self.conversions['rgb2gray'](outs[2])return outputs + outs

2-2 GPU

class ColorGpuPipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super(SimplePipeline, self).__init__(batch_size, num_threads, device_id, seed = 12)self.input = ops.FileReader(file_root = image_dir)self.decode = ops.ImageDecoder(device="mixed", output_type = types.RGB)self.conversions = {}self.conversions['rgb2bgr'] = ops.ColorSpaceConversion(device = 'gpu', image_type = types.RGB, output_type = types.BGR)self.conversions['rgb2ycbcr'] = ops.ColorSpaceConversion(device = 'gpu', image_type = types.RGB, output_type = types.YCbCr)self.conversions['rgb2gray'] = ops.ColorSpaceConversion(device = 'gpu', image_type = types.RGB, output_type = types.GRAY)def define_graph(self):self.jpegs, self.labels = self.input()images = self.decode(self.jpegs)outputs = [images.gpu()]outs = [images.gpu() for _ in range(n)]outs[0] = self.conversions['rgb2bgr'](outs[0])outs[1] = self.conversions['rgb2ycbcr'](outs[1])outs[2] = self.conversions['rgb2gray'](outs[2])return outputs + outs

3 BrightnessContrast

3-1 CPU

class BCCpuPipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super(BCCpuPipeline, self).__init__(batch_size, num_threads, device_id, seed=42)self.input = ops.FileReader(device="cpu", file_root=image_filename)self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB)self.bc = ops.BrightnessContrast(device="cpu", brightness_shift=0.3, contrast=0.4, contrast_center=100)def define_graph(self):read, _ = self.input()image = self.decode(read)converted = self.bc(image)return image, convertedpipecpu = BCCpuPipeline(batch_size=batch_size, num_threads=1, device_id=0)
pipecpu.build()
cpu_output = pipecpu.run()

3-2 GPU

class BCGpuPipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super(BCGpuPipeline, self).__init__(batch_size, num_threads, device_id, seed=42)self.input = ops.FileReader(device="cpu", file_root=image_filename)self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB)self.bc = ops.BrightnessContrast(device="gpu",contrast=1.5, brightness_shift = 1, brightness=-1)def define_graph(self):read, _ = self.input()image = self.decode(read)converted = self.bc(image.gpu())return image, convertedpipegpu = BCGpuPipeline(batch_size=batch_size, num_threads=1, device_id=0)
pipegpu.build()
gpu_output = pipegpu.run()

4 HSV

4-1 CPU

class HsvCpuPipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super().__init__(batch_size, num_threads, device_id, seed=42)self.input = ops.FileReader(device="cpu", file_root=image_filename)self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB)self.hsv = ops.Hsv(device="cpu", hue=120, saturation=1, value=0.4)def define_graph(self):read, _ = self.input()image = self.decode(read)converted = self.hsv(image)return image, converted

4-2 GPU

class HsvGpuPipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super().__init__(batch_size, num_threads, device_id, seed=42)self.input = ops.FileReader(device="cpu", file_root=image_filename)self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB)self.hsv = ops.Hsv(device="gpu", hue=120, saturation=2, value=1)def define_graph(self):read, _ = self.input()image = self.decode(read)converted = self.hsv(image.gpu())return image, converted

5 Resize

Scaling modes

“default” - the dimensions which are specified, are scaled to the requested size; the missing extents are calculated by applying average scale of the provided extents - for 2D and one extent specified, this means that aspect ratio is preserved

“stretch” - the dimensions which are specified, are scaled to the requested size; the missing ones are not scaled at all

“not_larger” - the image is scaled so that no dimension exceeds the specified size; aspect ratio is preserved

“not_smaller” - the image is scaled so that no dimension is smaler than specified (additionally, max_size argument may be used to limit upscaling of inputs with very high aspect ratios)

batch_size = 8pipe = dali.pipeline.Pipeline(batch_size, 3, 0)
with pipe:files, labels = dali.fn.caffe_reader(path = db_folder, random_shuffle = True, seed = 1234)images = dali.fn.image_decoder(files, device = "mixed")width = 200height = 100out = [dali.fn.resize(images, size=[height, width]),dali.fn.resize(images, resize_x=width),dali.fn.resize(images, resize_y=height),dali.fn.resize(images, size=[height, width], mode="stretch"),dali.fn.resize(images, resize_x=width, mode="stretch"),dali.fn.resize(images, resize_y=height, mode="stretch"),dali.fn.resize(images, size=[height, width], mode="not_larger"),dali.fn.resize(images, size=[height, width], mode="not_smaller"),dali.fn.resize(images, size=[height, width], mode="not_smaller", max_size=[110,200]),]pipe.set_outputs(*out)pipe.build()
pipe_out = pipe.run()

6 WarpAffine

def random_transform(index):dst_cx, dst_cy = (200,200)src_cx, src_cy = (200,200)# This function uses homogeneous coordinates - hence, 3x3 matrix# translate output coordinates to center defined by (dst_cx, dst_cy)t1 = np.array([[1, 0, -dst_cx],[0, 1, -dst_cy],[0, 0, 1]])def u():return np.random.uniform(-0.5, 0.5)# apply a randomized affine transform - uniform scaling + some random distortionm = np.array([[1 + u(),     u(),  0],[    u(), 1 + u(),  0],[      0,       0,  1]])# translate input coordinates to center (src_cx, src_cy)t2 = np.array([[1, 0, src_cx],[0, 1, src_cy],[0, 0, 1]])# combine the transformsm = (np.matmul(t2, np.matmul(m, t1)))# remove the last row; it's not used by affine transformreturn m[0:2,0:3]def gen_transforms(batch_size, single_transform_fn):out = np.zeros([batch_size, 2, 3])for i in range(batch_size):out[i,:,:] = single_transform_fn(i)return out.astype(np.float32)np.random.seed(seed = 123)class ExamplePipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id, pipelined = True, exec_async = True):super(ExamplePipeline, self).__init__(batch_size, num_threads, device_id,seed = 12, exec_pipelined=pipelined, exec_async=exec_async)# The reader reads raw files from some storage - in this case, a Caffe LMDB containerself.input = ops.CaffeReader(path = db_folder, random_shuffle = True)# The decoder takes tensors containing raw files and outputs images# as 3D tensors with HWC layoutself.decode = ops.ImageDecoder(device = "cpu", output_type = types.RGB)# This example uses ExternalSource to provide warp matricesself.transform_source = ops.ExternalSource()self.iter = 0self.warp_gpu = ops.WarpAffine(device = "gpu",size = (400,400),                 # specify the output size# fill_value                        # not specifying `fill_value`#   results in source coordinate clampinginterp_type = types.INTERP_LINEAR # use linear interpolation)self.warp_cpu = ops.WarpAffine(device = "cpu",fill_value = 200,size = (400,400),                 # specify the output sizeinterp_type = types.INTERP_NN     # use nearest neighbor interpolation)self.warp_keep_size = ops.WarpAffine(device = "gpu",# size                              # keep original canvas sizeinterp_type = types.INTERP_LINEAR # use linear interpolation)# Then, we can tie the operators together to form a graphdef define_graph(self):self.transform = self.transform_source()self.jpegs, self.labels = self.input()images = self.decode(self.jpegs)outputs = [images.gpu()]# pass the transform parameters through GPU memoryoutputs += [self.warp_gpu(images.gpu(), self.transform.gpu())]# pass the transform through a named inputoutputs += [self.warp_cpu(images, matrix = self.transform).gpu()]outputs += [self.warp_keep_size(images.gpu(), self.transform.gpu())]return [self.labels, self.transform] + outputs# Since we're using ExternalSource, we need to feed the externally provided data to the pipelinedef iter_setup(self):# Generate the transforms for the batch and feed them to the ExternalSourceself.feed_input(self.transform, gen_transforms(self.batch_size, random_transform))batch_size = 32
pipe = ExamplePipeline(batch_size=batch_size, num_threads=2, device_id = 0)
pipe.build()
pipe_out = pipe.run()

7 3D Transforms

pipe = Pipeline(batch_size = 1, num_threads = 3, device_id = 0, exec_pipelined = True, exec_async = True)
with pipe:data = fn.external_source(source=GetData, device="gpu", layout="DHWC")resized = fn.resize(data,resize_z = 224,mode = "stretch",  # scale only Z axisinterp_type = types.INTERP_LANCZOS3)angle = fn.random.uniform(range=(-20, 20), seed=123)axis = fn.random.uniform(range=(-1,1), shape=[3])rotated = fn.rotate(resized,angle = angle,axis = axis,interp_type = types.INTERP_LINEAR,fill_value = 0  # force out-of-bounds pixels to 0)warped = fn.warp_affine(resized,size = (200, 320, 320),matrix = (1, 1, 0,   -180,0, 1, 0.2, -20,0, 0, 1,   10),interp_type = types.INTERP_LINEAR,fill_value = 0  # force out-of-bounds pixels to 0)pipe.set_outputs(data, resized, rotated, warped, axis, angle)pipe.build()
pipe_out = pipe.run()

NVIDIA DALI从入门到放弃之五:Image Processing相关推荐

  1. NVIDIA DALI从入门到放弃之四:Multiple GPU

    NVIDIA DALI从入门到放弃之一:概述 NVIDIA DALI从入门到放弃之二:入门示例 NVIDIA DALI从入门到放弃之三:Data Loading NVIDIA DALI从入门到放弃之四 ...

  2. 【傻瓜攻略】深度学习之从入门到放弃

    从研究生进来之后,一直到现在已经进行了一年关于DP的学习,写篇文章总结一下我蹒跚的学习过程.总结来说是一个从入门学习到几乎想要放弃的过程.顺带列举下面几个坑,希望能帮助一下同样在这条路上行走的旅人们. ...

  3. Havok物理引擎不完全指南--从入门到放弃

    Havok物理引擎不完全指南–从入门到放弃 Havok概述 Havok,全称为Havok Game Dynamics SDK,译作Havok游戏动力开发包. 人们经常说到的,与PhysX相提并论的Ha ...

  4. AI从入门到放弃2:CNN的导火索,用MLP做图像分类识别?

    来源 | 腾讯知乎专栏 作者 | AIoys(腾讯员工,后台工程师) 项目文档和代码在此:github项目地址: https://github.com/zsysuper/AI_Notes ▌一.前言 ...

  5. AOP埋点从入门到放弃(二)

    其实人最大悲哀莫过于知道自己想要什么,却不知道怎么坚持!最近迷恋上了死侍 其实和我平时的状态差不多,以一个混子的心态去做任何事情,往往成功的概率会更大!!! 一张图片镇楼!!! 上文说到了Aspect ...

  6. 入门到放弃node系列之网络模块(二)

    为什么80%的码农都做不了架构师?>>>    前言 本文首发[一名打字员] 上一节我们刚刚介绍完node的HTTP和HTTPS模块,相信我们也对nodejs有了更深层次的理解,接下 ...

  7. webpack - vue Component 从入门到放弃(三)

    离上一篇已经一个星期了,人的拖延症是没法救的,今晚趁着蒙蒙春雨,来抒发抒发情感. 上一篇简单介绍了webpack的配置,这里稍微再做一一下延伸 插件 插件可以完成更多 loader 不能完成的功能.插 ...

  8. CYQ.Data 从入门到放弃ORM系列:开篇:自动化框架编程思维

    前言: 随着CYQ.Data 开始回归免费使用之后,发现用户的情绪越来越激动,为了保持这持续的激动性,让我有了开源的念头. 同时,由于框架经过这5-6年来的不断演进,以前发的早期教程已经太落后了,包括 ...

  9. python ** 运算符_Python从入门到放弃运算符(2)

    摘要:上一篇Python从入门到放弃-运算符(1),讲了Python的运算符中的算术运算符.赋值运算符.比较(关系)运算符,这篇继续讲Python的运算符. 逻辑运算符 逻辑运算符是对真和假两种布尔值 ...

最新文章

  1. 罗田用好“大数据”力促扶贫更精准
  2. 绝了!“修仙模式”学编程是什么体验?
  3. 命令行 蓝牙_Ubuntu使用BlueZ驱动蓝牙dongle
  4. c 打印 callback 函数名_Go 中的函数
  5. mysql sql时间比较_mysql和sql时间 字段比较大小的问题
  6. java8种基本类型长度_【Java基础】Java的8种基本数据类型深入介绍
  7. Web服务器常见HTTP错误码
  8. np.stack()函数详解
  9. 爬取北京市公交线路信息
  10. php容器概念,PHP容器——Pimple运行流程浅析
  11. java 雷霆战机 教程,java swing实现简单的雷霆战机小游戏项目源码附带视频指导修改教程...
  12. excel提取奇数行
  13. 调用iframe子页面中的函数
  14. 我国老年相亲市场的现状分析:百亿级市场的难点与机遇
  15. JAVA-XMLJSON
  16. 图像灰度值 灰度值与像素值的关系
  17. ssm+jsp计算机毕业设计宠物网站laf7a(程序+LW+源码+远程部署)
  18. 鸣志驱动器与研华工控机RS485/422 com串口接线方法
  19. android多渠道打包插件,Android几种多渠道打包的步骤详解
  20. mysql 的innoDB和NDB数据库引擎

热门文章

  1. Linux通配符的使用详解
  2. Linux内核及内核编程之五Linux下的C编程特点
  3. 旧版本Microsoft Office正在配置解决方法
  4. 【windows版】TensorRT安装教程
  5. 教育培训系统,软件行业的“常青藤”
  6. express+request实现-图夫在线爬取网页图片
  7. 手机控制电脑,在WIFI局域网下(关机,重启,遥控)
  8. 2012-11-09
  9. 高数__已知一个平面方程_求平行的平面,并且经过某点
  10. 关于Tex的一般用法汇总(各种操作链接自己使用 一直更新)