NVIDIA DALI从入门到放弃之五:Image Processing
NVIDIA DALI从入门到放弃之一:概述
NVIDIA DALI从入门到放弃之二:入门示例
NVIDIA DALI从入门到放弃之三:Data Loading
NVIDIA DALI从入门到放弃之四:Multiple GPU
NVIDIA DALI从入门到放弃之五:Image Processing
NVIDIA DALI从入门到放弃之六:Geometric Transforms
NVIDIA DALI从入门到放弃之七:Sequence Processing
NVIDIA DALI从入门到放弃之八:PyTorch Plugin API
1 Image Decoder
1-1 CPU
class ImageDecoderCropPipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super(ImageDecoderCropPipeline, self).__init__(batch_size, num_threads, device_id, seed = seed)self.input = ops.FileReader(file_root = image_dir)self.pos_rng_x = ops.random.Uniform(range = (0.0, 1.0))self.pos_rng_y = ops.random.Uniform(range = (0.0, 1.0))self.decode = ops.ImageDecoderCrop(device = 'cpu', output_type = types.RGB, crop = (224, 224))def define_graph(self):jpegs, labels = self.input()pos_x = self.pos_rng_x()pos_y = self.pos_rng_y()images = self.decode(jpegs, crop_pos_x=pos_x, crop_pos_y=pos_y)return (images, labels)pipe = ImageDecoderCropPipeline(batch_size, 1, 0)
pipe.build()
images, _ = pipe.run()
show_images( images )
1-2 GPU
import numpy as npclass ExternalInputIterator(object):def __init__(self, batch_size):self.batch_size = batch_sizedef __iter__(self):self.i = 0self.n = self.batch_sizereturn selfdef __next__(self):pos = []size = []for _ in range(self.batch_size):pos.append(np.asarray([0.4, 0.2], dtype=np.float32))size.append(np.asarray([0.3, 0.5], dtype=np.float32))self.i = (self.i + 1) % self.nreturn (pos, size)next = __next__eii = ExternalInputIterator(batch_size)
pos_size_iter = iter(eii)class ImageDecoderSlicePipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super(ImageDecoderSlicePipeline, self).__init__(batch_size, num_threads, device_id, seed = seed)self.input = ops.FileReader(file_root = image_dir)self.input_crop_pos = ops.ExternalSource()self.input_crop_size = ops.ExternalSource()self.input_crop = ops.ExternalSource()self.decode = ops.ImageDecoderSlice(device = 'mixed', output_type = types.RGB)def define_graph(self):jpegs, labels = self.input()self.crop_pos = self.input_crop_pos()self.crop_size = self.input_crop_size()images = self.decode(jpegs, self.crop_pos, self.crop_size)return (images, labels)def iter_setup(self):(crop_pos, crop_size) = pos_size_iter.next()self.feed_input(self.crop_pos, crop_pos)self.feed_input(self.crop_size, crop_size)pipe = ImageDecoderSlicePipeline(batch_size, 1, 0)
pipe.build()
images, _ = pipe.run()
show_images( images.as_cpu() )
2 Color Space Conversion
2-1 CPU
class ColorCpuPipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super(SimplePipeline, self).__init__(batch_size, num_threads, device_id, seed = 12)self.input = ops.FileReader(file_root = image_dir)self.decode = ops.ImageDecoder(device = 'cpu', output_type = types.RGB)self.conversions = {}self.conversions['rgb2bgr'] = ops.ColorSpaceConversion(device = 'cpu', image_type = types.RGB, output_type = types.BGR)self.conversions['rgb2ycbcr'] = ops.ColorSpaceConversion(device = 'cpu', image_type = types.RGB, output_type = types.YCbCr)self.conversions['rgb2gray'] = ops.ColorSpaceConversion(device = 'cpu', image_type = types.RGB, output_type = types.GRAY)def define_graph(self):self.jpegs, self.labels = self.input()images = self.decode(self.jpegs)outputs = [images]outs = [images for _ in range(n)]outs[0] = self.conversions['rgb2bgr'](outs[0])outs[1] = self.conversions['rgb2ycbcr'](outs[1])outs[2] = self.conversions['rgb2gray'](outs[2])return outputs + outs
2-2 GPU
class ColorGpuPipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super(SimplePipeline, self).__init__(batch_size, num_threads, device_id, seed = 12)self.input = ops.FileReader(file_root = image_dir)self.decode = ops.ImageDecoder(device="mixed", output_type = types.RGB)self.conversions = {}self.conversions['rgb2bgr'] = ops.ColorSpaceConversion(device = 'gpu', image_type = types.RGB, output_type = types.BGR)self.conversions['rgb2ycbcr'] = ops.ColorSpaceConversion(device = 'gpu', image_type = types.RGB, output_type = types.YCbCr)self.conversions['rgb2gray'] = ops.ColorSpaceConversion(device = 'gpu', image_type = types.RGB, output_type = types.GRAY)def define_graph(self):self.jpegs, self.labels = self.input()images = self.decode(self.jpegs)outputs = [images.gpu()]outs = [images.gpu() for _ in range(n)]outs[0] = self.conversions['rgb2bgr'](outs[0])outs[1] = self.conversions['rgb2ycbcr'](outs[1])outs[2] = self.conversions['rgb2gray'](outs[2])return outputs + outs
3 BrightnessContrast
3-1 CPU
class BCCpuPipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super(BCCpuPipeline, self).__init__(batch_size, num_threads, device_id, seed=42)self.input = ops.FileReader(device="cpu", file_root=image_filename)self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB)self.bc = ops.BrightnessContrast(device="cpu", brightness_shift=0.3, contrast=0.4, contrast_center=100)def define_graph(self):read, _ = self.input()image = self.decode(read)converted = self.bc(image)return image, convertedpipecpu = BCCpuPipeline(batch_size=batch_size, num_threads=1, device_id=0)
pipecpu.build()
cpu_output = pipecpu.run()
3-2 GPU
class BCGpuPipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super(BCGpuPipeline, self).__init__(batch_size, num_threads, device_id, seed=42)self.input = ops.FileReader(device="cpu", file_root=image_filename)self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB)self.bc = ops.BrightnessContrast(device="gpu",contrast=1.5, brightness_shift = 1, brightness=-1)def define_graph(self):read, _ = self.input()image = self.decode(read)converted = self.bc(image.gpu())return image, convertedpipegpu = BCGpuPipeline(batch_size=batch_size, num_threads=1, device_id=0)
pipegpu.build()
gpu_output = pipegpu.run()
4 HSV
4-1 CPU
class HsvCpuPipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super().__init__(batch_size, num_threads, device_id, seed=42)self.input = ops.FileReader(device="cpu", file_root=image_filename)self.decode = ops.ImageDecoder(device="cpu", output_type=types.RGB)self.hsv = ops.Hsv(device="cpu", hue=120, saturation=1, value=0.4)def define_graph(self):read, _ = self.input()image = self.decode(read)converted = self.hsv(image)return image, converted
4-2 GPU
class HsvGpuPipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id):super().__init__(batch_size, num_threads, device_id, seed=42)self.input = ops.FileReader(device="cpu", file_root=image_filename)self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB)self.hsv = ops.Hsv(device="gpu", hue=120, saturation=2, value=1)def define_graph(self):read, _ = self.input()image = self.decode(read)converted = self.hsv(image.gpu())return image, converted
5 Resize
Scaling modes
“default” - the dimensions which are specified, are scaled to the requested size; the missing extents are calculated by applying average scale of the provided extents - for 2D and one extent specified, this means that aspect ratio is preserved
“stretch” - the dimensions which are specified, are scaled to the requested size; the missing ones are not scaled at all
“not_larger” - the image is scaled so that no dimension exceeds the specified size; aspect ratio is preserved
“not_smaller” - the image is scaled so that no dimension is smaler than specified (additionally, max_size argument may be used to limit upscaling of inputs with very high aspect ratios)
batch_size = 8pipe = dali.pipeline.Pipeline(batch_size, 3, 0)
with pipe:files, labels = dali.fn.caffe_reader(path = db_folder, random_shuffle = True, seed = 1234)images = dali.fn.image_decoder(files, device = "mixed")width = 200height = 100out = [dali.fn.resize(images, size=[height, width]),dali.fn.resize(images, resize_x=width),dali.fn.resize(images, resize_y=height),dali.fn.resize(images, size=[height, width], mode="stretch"),dali.fn.resize(images, resize_x=width, mode="stretch"),dali.fn.resize(images, resize_y=height, mode="stretch"),dali.fn.resize(images, size=[height, width], mode="not_larger"),dali.fn.resize(images, size=[height, width], mode="not_smaller"),dali.fn.resize(images, size=[height, width], mode="not_smaller", max_size=[110,200]),]pipe.set_outputs(*out)pipe.build()
pipe_out = pipe.run()
6 WarpAffine
def random_transform(index):dst_cx, dst_cy = (200,200)src_cx, src_cy = (200,200)# This function uses homogeneous coordinates - hence, 3x3 matrix# translate output coordinates to center defined by (dst_cx, dst_cy)t1 = np.array([[1, 0, -dst_cx],[0, 1, -dst_cy],[0, 0, 1]])def u():return np.random.uniform(-0.5, 0.5)# apply a randomized affine transform - uniform scaling + some random distortionm = np.array([[1 + u(), u(), 0],[ u(), 1 + u(), 0],[ 0, 0, 1]])# translate input coordinates to center (src_cx, src_cy)t2 = np.array([[1, 0, src_cx],[0, 1, src_cy],[0, 0, 1]])# combine the transformsm = (np.matmul(t2, np.matmul(m, t1)))# remove the last row; it's not used by affine transformreturn m[0:2,0:3]def gen_transforms(batch_size, single_transform_fn):out = np.zeros([batch_size, 2, 3])for i in range(batch_size):out[i,:,:] = single_transform_fn(i)return out.astype(np.float32)np.random.seed(seed = 123)class ExamplePipeline(Pipeline):def __init__(self, batch_size, num_threads, device_id, pipelined = True, exec_async = True):super(ExamplePipeline, self).__init__(batch_size, num_threads, device_id,seed = 12, exec_pipelined=pipelined, exec_async=exec_async)# The reader reads raw files from some storage - in this case, a Caffe LMDB containerself.input = ops.CaffeReader(path = db_folder, random_shuffle = True)# The decoder takes tensors containing raw files and outputs images# as 3D tensors with HWC layoutself.decode = ops.ImageDecoder(device = "cpu", output_type = types.RGB)# This example uses ExternalSource to provide warp matricesself.transform_source = ops.ExternalSource()self.iter = 0self.warp_gpu = ops.WarpAffine(device = "gpu",size = (400,400), # specify the output size# fill_value # not specifying `fill_value`# results in source coordinate clampinginterp_type = types.INTERP_LINEAR # use linear interpolation)self.warp_cpu = ops.WarpAffine(device = "cpu",fill_value = 200,size = (400,400), # specify the output sizeinterp_type = types.INTERP_NN # use nearest neighbor interpolation)self.warp_keep_size = ops.WarpAffine(device = "gpu",# size # keep original canvas sizeinterp_type = types.INTERP_LINEAR # use linear interpolation)# Then, we can tie the operators together to form a graphdef define_graph(self):self.transform = self.transform_source()self.jpegs, self.labels = self.input()images = self.decode(self.jpegs)outputs = [images.gpu()]# pass the transform parameters through GPU memoryoutputs += [self.warp_gpu(images.gpu(), self.transform.gpu())]# pass the transform through a named inputoutputs += [self.warp_cpu(images, matrix = self.transform).gpu()]outputs += [self.warp_keep_size(images.gpu(), self.transform.gpu())]return [self.labels, self.transform] + outputs# Since we're using ExternalSource, we need to feed the externally provided data to the pipelinedef iter_setup(self):# Generate the transforms for the batch and feed them to the ExternalSourceself.feed_input(self.transform, gen_transforms(self.batch_size, random_transform))batch_size = 32
pipe = ExamplePipeline(batch_size=batch_size, num_threads=2, device_id = 0)
pipe.build()
pipe_out = pipe.run()
7 3D Transforms
pipe = Pipeline(batch_size = 1, num_threads = 3, device_id = 0, exec_pipelined = True, exec_async = True)
with pipe:data = fn.external_source(source=GetData, device="gpu", layout="DHWC")resized = fn.resize(data,resize_z = 224,mode = "stretch", # scale only Z axisinterp_type = types.INTERP_LANCZOS3)angle = fn.random.uniform(range=(-20, 20), seed=123)axis = fn.random.uniform(range=(-1,1), shape=[3])rotated = fn.rotate(resized,angle = angle,axis = axis,interp_type = types.INTERP_LINEAR,fill_value = 0 # force out-of-bounds pixels to 0)warped = fn.warp_affine(resized,size = (200, 320, 320),matrix = (1, 1, 0, -180,0, 1, 0.2, -20,0, 0, 1, 10),interp_type = types.INTERP_LINEAR,fill_value = 0 # force out-of-bounds pixels to 0)pipe.set_outputs(data, resized, rotated, warped, axis, angle)pipe.build()
pipe_out = pipe.run()
NVIDIA DALI从入门到放弃之五:Image Processing相关推荐
- NVIDIA DALI从入门到放弃之四:Multiple GPU
NVIDIA DALI从入门到放弃之一:概述 NVIDIA DALI从入门到放弃之二:入门示例 NVIDIA DALI从入门到放弃之三:Data Loading NVIDIA DALI从入门到放弃之四 ...
- 【傻瓜攻略】深度学习之从入门到放弃
从研究生进来之后,一直到现在已经进行了一年关于DP的学习,写篇文章总结一下我蹒跚的学习过程.总结来说是一个从入门学习到几乎想要放弃的过程.顺带列举下面几个坑,希望能帮助一下同样在这条路上行走的旅人们. ...
- Havok物理引擎不完全指南--从入门到放弃
Havok物理引擎不完全指南–从入门到放弃 Havok概述 Havok,全称为Havok Game Dynamics SDK,译作Havok游戏动力开发包. 人们经常说到的,与PhysX相提并论的Ha ...
- AI从入门到放弃2:CNN的导火索,用MLP做图像分类识别?
来源 | 腾讯知乎专栏 作者 | AIoys(腾讯员工,后台工程师) 项目文档和代码在此:github项目地址: https://github.com/zsysuper/AI_Notes ▌一.前言 ...
- AOP埋点从入门到放弃(二)
其实人最大悲哀莫过于知道自己想要什么,却不知道怎么坚持!最近迷恋上了死侍 其实和我平时的状态差不多,以一个混子的心态去做任何事情,往往成功的概率会更大!!! 一张图片镇楼!!! 上文说到了Aspect ...
- 入门到放弃node系列之网络模块(二)
为什么80%的码农都做不了架构师?>>> 前言 本文首发[一名打字员] 上一节我们刚刚介绍完node的HTTP和HTTPS模块,相信我们也对nodejs有了更深层次的理解,接下 ...
- webpack - vue Component 从入门到放弃(三)
离上一篇已经一个星期了,人的拖延症是没法救的,今晚趁着蒙蒙春雨,来抒发抒发情感. 上一篇简单介绍了webpack的配置,这里稍微再做一一下延伸 插件 插件可以完成更多 loader 不能完成的功能.插 ...
- CYQ.Data 从入门到放弃ORM系列:开篇:自动化框架编程思维
前言: 随着CYQ.Data 开始回归免费使用之后,发现用户的情绪越来越激动,为了保持这持续的激动性,让我有了开源的念头. 同时,由于框架经过这5-6年来的不断演进,以前发的早期教程已经太落后了,包括 ...
- python ** 运算符_Python从入门到放弃运算符(2)
摘要:上一篇Python从入门到放弃-运算符(1),讲了Python的运算符中的算术运算符.赋值运算符.比较(关系)运算符,这篇继续讲Python的运算符. 逻辑运算符 逻辑运算符是对真和假两种布尔值 ...
最新文章
- 罗田用好“大数据”力促扶贫更精准
- 绝了!“修仙模式”学编程是什么体验?
- 命令行 蓝牙_Ubuntu使用BlueZ驱动蓝牙dongle
- c 打印 callback 函数名_Go 中的函数
- mysql sql时间比较_mysql和sql时间 字段比较大小的问题
- java8种基本类型长度_【Java基础】Java的8种基本数据类型深入介绍
- Web服务器常见HTTP错误码
- np.stack()函数详解
- 爬取北京市公交线路信息
- php容器概念,PHP容器——Pimple运行流程浅析
- java 雷霆战机 教程,java swing实现简单的雷霆战机小游戏项目源码附带视频指导修改教程...
- excel提取奇数行
- 调用iframe子页面中的函数
- 我国老年相亲市场的现状分析:百亿级市场的难点与机遇
- JAVA-XMLJSON
- 图像灰度值 灰度值与像素值的关系
- ssm+jsp计算机毕业设计宠物网站laf7a(程序+LW+源码+远程部署)
- 鸣志驱动器与研华工控机RS485/422 com串口接线方法
- android多渠道打包插件,Android几种多渠道打包的步骤详解
- mysql 的innoDB和NDB数据库引擎
热门文章
- Linux通配符的使用详解
- Linux内核及内核编程之五Linux下的C编程特点
- 旧版本Microsoft Office正在配置解决方法
- 【windows版】TensorRT安装教程
- 教育培训系统,软件行业的“常青藤”
- express+request实现-图夫在线爬取网页图片
- 手机控制电脑,在WIFI局域网下(关机,重启,遥控)
- 2012-11-09
- 高数__已知一个平面方程_求平行的平面,并且经过某点
- 关于Tex的一般用法汇总(各种操作链接自己使用 一直更新)