AVOD-代码理解系列（三）

AVOD-代码理解

　从输入到数据融合之前

    def build(self):# Setup input placeholders# 一堆的输入信息.就是给输入占个坑self._set_up_input_pls()# Setup feature extractors# 输入图像的featuremapself._set_up_feature_extractors()# proposal_input# 1*1卷积后的bev_fasturemap和img_featuremapbev_proposal_input = self.bev_bottleneckimg_proposal_input = self.img_bottleneck# 融合参数fusion_mean_div_factor = 2.0# If both img and bev probabilites are set to 1.0, don't do# path drop.如果image和bev概率都设置为1.0，则不要执行路径丢弃。#train=0.9,test/val=1.0#ture!if not (self._path_drop_probabilities[0] ==self._path_drop_probabilities[1] == 1.0):with tf.variable_scope('rpn_path_drop'):#从均匀分布中输出随机值。.随机输出3个0-1之间的数random_values = tf.random_uniform(shape=[3],minval=0.0,maxval=1.0)#0.9,0.9.#不是零就是1img_mask, bev_mask = self.create_path_drop_masks(self._path_drop_probabilities[0],self._path_drop_probabilities[1],random_values)#选择是否输入.如果mask为1 ,则输入,否则不输入img_proposal_input = tf.multiply(img_proposal_input,img_mask)bev_proposal_input = tf.multiply(bev_proposal_input,bev_mask)self.img_path_drop_mask = img_maskself.bev_path_drop_mask = bev_mask# Overwrite the division factor#在训练时的融合参数fusion_mean_div_factor = img_mask + bev_mask#bev和iname的featuremap的裁剪with tf.variable_scope('proposal_roi_pooling'):with tf.variable_scope('box_indices'):'''有点复杂'''def get_box_indices(boxes):#list列表.[1,2,3...]#看不懂proposals_shape = boxes.get_shape().as_list()if any(dim is None for dim in proposals_shape):#falseproposals_shape = tf.shape(boxes)ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)multiplier = tf.expand_dims(tf.range(start=0, limit=proposals_shape[0]), 1)return tf.reshape(ones_mat * multiplier, [-1])#在列上增加一维?[none,5]?bev_boxes_norm_batches = tf.expand_dims(self._bev_anchors_norm_pl, axis=0)# These should be all 0's since there is only 1 image#这些应该全是0，因为只有1个图像tf_box_indices = get_box_indices(bev_boxes_norm_batches)# Do ROI Pooling on BEV#tf.image.crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width)#[num_boxes, crop_height, crop_width, depth].'''在网络结构中，融合两种数据之前会有一个resize'''#主要目的是让两种数据的输入能够统一一下，便于后续做数据融合．resize为６＊６的输出bev_proposal_rois = tf.image.crop_and_resize(bev_proposal_input,self._bev_anchors_norm_pl,tf_box_indices,self._proposal_roi_crop_size)# Do ROI Pooling on imageimg_proposal_rois = tf.image.crop_and_resize(img_proposal_input,self._img_anchors_norm_pl,tf_box_indices,self._proposal_roi_crop_size)

self.create_path_drop_masks：这个函数块的主要功能就是确定bev_mask和img_mask，其实我也不明白这样做的意义是啥．

 def create_path_drop_masks(self,p_img,p_bev,random_values):"""Determines global path drop decision based on given probabilities.Args:p_img: A tensor of float32, probability of keeping image branch保持图像分支的概率p_bev: A tensor of float32, probability of keeping bev branchrandom_values: A tensor of float32 of shape [3], the resultsof coin flips, values should range from 0.0 - 1.0.Returns:final_img_mask: A constant tensor mask containing either one or zerodepending on the final coin flip probability.final_bev_mask: A constant tensor mask containing either one or zerodepending on the final coin flip probability."""#keep=1,kill=0def keep_branch(): return tf.constant(1.0)def kill_branch(): return tf.constant(0.0)# The logic works as follows:# We have flipped 3 coins, first determines the chance of keeping# the image branch, second determines keeping bev branch, the third# makes the final decision in the case where both branches were killed# off, otherwise the initial img and bev chances are kept.#首先确定保持图像分支的机会，第二个确定保持bev分支，# 第三个决定在两个分支被杀掉的情况下做出最终决定，否则保留初始的img和bev机会。#tf.less()->bool/将x的数据格式转化成dtype.#random_value[0]<0.9,keepimg_chances = tf.case([(tf.less(random_values[0], p_img),keep_branch)], default=kill_branch)#random_value[1]<0.9,keep,默认为kill!bev_chances = tf.case([(tf.less(random_values[1], p_bev),keep_branch)], default=kill_branch)# Decision to determine whether both branches were killed off#两者是否有一个为1,则返回1．不应该是and吗?third_flip = tf.logical_or(tf.cast(img_chances, dtype=tf.bool),tf.cast(bev_chances, dtype=tf.bool))#两者有一个为1,则third_flip=1.0third_flip = tf.cast(third_flip, dtype=tf.float32)# Make a second choice, for the third case# Here we use a 50/50 chance to keep either image or bev# If its greater than 0.5, keep the image#random_value[2]>0.5,keepimg_second_flip = tf.case([(tf.greater(random_values[2], 0.5),keep_branch)],default=kill_branch)# If its less than or equal to 0.5, keep bev#random_value[2]<=0.5,keep/两者相冲bev_second_flip = tf.case([(tf.less_equal(random_values[2], 0.5),keep_branch)],default=kill_branch)# Use lambda since this returns another condition and it needs to# be callable#如果third=1,则使用img_chances作为最终结果,即random_value[0]<0.9,keep#否则,以第二种方法作为最终结果／第二种方法里面只会有一个为truefinal_img_mask = tf.case([(tf.equal(third_flip, 1),lambda: img_chances)],default=lambda: img_second_flip)#同上final_bev_mask = tf.case([(tf.equal(third_flip, 1),lambda: bev_chances)],default=lambda: bev_second_flip)return final_img_mask, final_bev_mask

2 数据融合到rpn，也就是利用卷积网络对其进行分类（二分类：背景和物体）和回归

 #bev和image的融合with tf.variable_scope('proposal_roi_fusion'):rpn_fusion_out = None#meanif self._fusion_method == 'mean':tf_features_sum = tf.add(bev_proposal_rois, img_proposal_rois)#/2平均融合方式rpn_fusion_out = tf.divide(tf_features_sum,fusion_mean_div_factor)elif self._fusion_method == 'concat':rpn_fusion_out = tf.concat([bev_proposal_rois, img_proposal_rois], axis=3)else:raise ValueError('Invalid fusion method', self._fusion_method)# TODO: move this section into an separate AnchorPredictor classwith tf.variable_scope('anchor_predictor', 'ap', [rpn_fusion_out]):#融合后的作为输入tensor_in = rpn_fusion_out# Parse rpn layers configlayers_config = self._config.layers_config.rpn_config#0.0005l2_weight_decay = layers_config.l2_weight_decayif l2_weight_decay > 0:#正则化weights_regularizer = slim.l2_regularizer(l2_weight_decay)else:weights_regularizer = Nonewith slim.arg_scope([slim.conv2d],weights_regularizer=weights_regularizer):# Use conv2d instead of fully_connected layers.#256,6上一层的输出实际上就是6*6的所以将全连接化为卷积操作，使用6*6的卷积核cls_fc6 = slim.conv2d(tensor_in,layers_config.cls_fc6,self._proposal_roi_crop_size,padding='VALID',scope='cls_fc6')#0.5cls_fc6_drop = slim.dropout(cls_fc6,layers_config.keep_prob,is_training=self._is_training,scope='cls_fc6_drop')#256cls_fc7 = slim.conv2d(cls_fc6_drop,layers_config.cls_fc7,[1, 1],scope='cls_fc7')cls_fc7_drop = slim.dropout(cls_fc7,layers_config.keep_prob,is_training=self._is_training,scope='cls_fc7_drop')#2,分类cls_fc8 = slim.conv2d(cls_fc7_drop,2,[1, 1],activation_fn=None,scope='cls_fc8')#删除指定尺寸为1 的objectness = tf.squeeze(cls_fc8, [1, 2],name='cls_fc8/squeezed')# Use conv2d instead of fully_connected layers.#256,6reg_fc6 = slim.conv2d(tensor_in,layers_config.reg_fc6,self._proposal_roi_crop_size,padding='VALID',scope='reg_fc6')#dropout0.5reg_fc6_drop = slim.dropout(reg_fc6,layers_config.keep_prob,is_training=self._is_training,scope='reg_fc6_drop')#256reg_fc7 = slim.conv2d(reg_fc6_drop,layers_config.reg_fc7,[1, 1],scope='reg_fc7')reg_fc7_drop = slim.dropout(reg_fc7,layers_config.keep_prob,is_training=self._is_training,scope='reg_fc7_drop')#∆t x , ∆t y , ∆t z , ∆d x , ∆d y , ∆d z#256,6个回归值包括中心点差值,以及长宽高的差值reg_fc8 = slim.conv2d(reg_fc7_drop,6,[1, 1],activation_fn=None,scope='reg_fc8')#?offsets = tf.squeeze(reg_fc8, [1, 2],name='reg_fc8/squeezed')

AVOD-代码理解系列（三）相关推荐

Lossless Codec---APE代码解读系列(二)
APE file 一些概念 APE代码解读系列(一) APE代码解读系列(三) 1. 先要了解APE compression level APE主要有5level, 分别是: CompressionL ...
【k-匿名（k-Anonymity）代码实现】差分隐私代码实现系列（三）
差分隐私代码实现系列(三) 写在前面的话回顾 kkk-匿名(kkk-Anonymity) 检查kkk-匿名(Checking for kkk-Anonymity) 生成满足kkk-匿名的数据(Gen ...
阵列信号DOA估计系列(三).MVDR/Capon波束形成(附代码)
标题阵列信号DOA估计系列(三).MVDR/Capon波束形成 MVDR算法得基本思路是在频域/空间形成一个窄带滤波器,从此出发,可见MVDR不但对噪声有抑制作用,来对观察频率/角度之外的信号有抑制作 ...
Python 字符串系列三字符串的拼接拆分和判断【简单易懂，代码可以直接运行，强烈推荐】
Python 字符串系列三字符串的拼接拆分和判断[简单易懂,代码可以直接运行,强烈推荐] ''' 字符串的拼接和拆分 #拆分和拼接在''中\是转义字符,只有\才表示一个 ''' #以文件路径为例 p ...
后端如何发出请求_gRPC系列(三) 如何借助HTTP2实现传输
本系列分为四大部分: gRPC系列(一) 什么是RPC? gRPC系列(二) 如何用Protobuf组织内容 gRPC系列(三) 如何借助HTTP2实现传输 gRPC系列(四) 框架如何赋能分布式系统 ...
重学JavaScript深入理解系列（一）
JavaScript深入理解--执行上下文(Execution Context) 定义每当控制器到达ECMAScript可执行代码的时候,控制器就进入了一个执行上下文. 执行上下文(简称:EC) 以 ...
Android Studio 单刷《第一行代码》系列 04 —— Activity 相关
前情提要(Previously) 本系列将使用 Android Studio 将<第一行代码>(书中讲解案例使用Eclipse)刷一遍,旨在为想入坑 Android 开发,并选择 Andr ...
VSTO之旅系列(三)：自定义Excel UI
VSTO之旅系列(三):自定义Excel UI 原文:VSTO之旅系列(三):自定义Excel UI 本专题概要引言自定义任务窗体(Task Pane) 自定义选项卡,即Ribbon 自定义上下文 ...
[系统安全] 四十六.Powershell恶意代码检测系列 (1)Powershell基础入门及管道和变量的用法
您可能之前看到过我写的类似文章,为什么还要重复撰写呢?只是想更好地帮助初学者了解病毒逆向分析和系统安全,更加成体系且不破坏之前的系列.因此,我重新开设了这个专栏,准备系统整理和深入学习系统安全.逆向分 ...
重学JavaScript深入理解系列（六）
JavaScript深入理解--闭包(Closures) 概要本文将介绍一个在JavaScript经常会拿来讨论的话题 -- 闭包(closure).闭包其实已经是个老生常谈的话题了: 有大量文章都 ...

AVOD-代码理解系列（三）

AVOD-代码理解

从输入到数据融合之前

2 数据融合到rpn，也就是利用卷积网络对其进行分类（二分类：背景和物体）和回归

AVOD-代码理解系列（三）相关推荐

最新文章

热门文章