imagePathPaper: http://jeffdonahue.com/lrcn/
Code: the “lstm_video_deploy” branch of Lisa Anne Hendricks’s Caffe fork

Python Layer

train_test_lstm_RGB.prototxt

name: "lstm_joints"
layer {name: "data"type: "Python"top: "data"top: "label"top: "clip_markers"python_param {module: "sequence_input_layer"layer: "videoReadTrain_RGB"}include: { phase: TRAIN }
}

layer_factory.cpp

template <typename Dtype>
shared_ptr<Layer<Dtype> > GetPythonLayer(const LayerParameter& param) {Py_Initialize();try {// open "sequence_input_layer.py"bp::object module = bp::import(param.python_param().module().c_str());//class videoReadTrain_RGB(videoRead)对象bp::object layer = module.attr(param.python_param().layer().c_str())(param);// extract<T> can be used to extract a value of an arbitrary C++ type from an instance Of objectreturn bp::extract<shared_ptr<PythonLayer<Dtype> > >(layer)();} catch (bp::error_already_set) {PyErr_Print();throw;}
}REGISTER_LAYER_CREATOR(Python, GetPythonLayer);

python_layer.cpp

//python层的主要函数是在LayerSetup和Forward时.
template <typename Dtype>
class PythonLayer : public Layer<Dtype> {public:PythonLayer(PyObject* self, const LayerParameter& param): Layer<Dtype>(param), self_(self) { }//初始化virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top) {try {bp::call_method<bp::object>(self_, "setup", bottom, top);} catch (bp::error_already_set) {PyErr_Print();throw;}}virtual void Reshape(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top) {try {bp::call_method<bp::object>(self_, "reshape", bottom, top);} catch (bp::error_already_set) {PyErr_Print();throw;}}virtual inline const char* type() const { return "Python"; }protected:virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top) {try {bp::call_method<bp::object>(self_, "forward", bottom, top);} catch (bp::error_already_set) {PyErr_Print();throw;}}virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {try {bp::call_method<bp::object>(self_, "backward", top, propagate_down,bottom);} catch (bp::error_already_set) {PyErr_Print();throw;}}private:PyObject* self_;
};

sequence_input_layer.py

RGB_frames = 'RGBframes/'
test_frames = 16
train_frames = 16
test_buffer = 3
train_buffer = 24class videoReadTrain_RGB(videoRead):def initialize(self):self.train_or_test = 'train'self.flow = Falseself.buffer_size = train_buffer  #num videos processed per batch(24) # 如果video memory(gpu)不够,可以调整    self.frames = train_frames   #length of processed clip(16)# 每个video里选16张连续frames.这个值不可以调整,因为程序写死了.self.N = self.buffer_size*self.framesself.idx = 0self.channels = 3self.height = 227self.width = 227self.path_to_images = RGB_frames self.video_list = 'ucf101_split1_trainVideos.txt' #内容如下:  eventA/eventA下的某个视频  eventA的标签#TableTennisShot/v_TableTennisShot_g19_c03 89#MilitaryParade/v_MilitaryParade_g09_c06 52#RopeClimbing/v_RopeClimbing_g16_c01 74

class videoRead(caffe.Layer):def setup(self, bottom, top):#注意这里!这就保证了每次重新跑实验的时候生成的随机序列都是相同的!random.seed(10)self.initialize()f = open(self.video_list, 'r')f_lines = f.readlines()f.close()video_dict = {}current_line = 0self.video_order = []for ix, line in enumerate(f_lines):#line:  TableTennisShot/v_TableTennisShot_g19_c03 89video = line.split(' ')[0].split('/')[1]  #v_TableTennisShot_g19_c03l = int(line.split(' ')[1])  #89#frames:RGBframes/v_TableTennisShot_g19_c03下所有的jpg图片frames = glob.glob('%s%s/*.jpg' %(self.path_to_images, video))       num_frames = len(frames)video_dict[video] = {}#video_dict[video]['frames']:RGBframes/v_TableTennisShot_g19_c03/%04d.jpgvideo_dict[video]['frames'] = frames[0].split('.')[0] + '.%04d.jpg' %(self.path_to_images, video)) video_dict[video]['reshape'] = (240,320)video_dict[video]['crop'] = (227, 227)video_dict[video]['num_frames'] = num_framesvideo_dict[video]['label'] = lself.video_order.append(video) self.video_dict = video_dictself.num_videos = len(video_dict.keys())#set up data transformershape = (self.N, self.channels, self.height, self.width)self.transformer = caffe.io.Transformer({'data_in': shape})self.transformer.set_raw_scale('data_in', 255)if self.flow:image_mean = [128, 128, 128]self.transformer.set_is_flow('data_in', True)else:image_mean = [103.939, 116.779, 128.68]self.transformer.set_is_flow('data_in', False)#Three 227x227 matrices while all the elements are zerochannel_mean = np.zeros((3,227,227)) for channel_index, mean_val in enumerate(image_mean):#all elements of 1st matrix become 103.939. #all elements of 2rd matrix become 116.779. #all elements of 3th matrix become 128.68channel_mean[channel_index, ...] = mean_val self.transformer.set_mean('data_in', channel_mean)self.transformer.set_channel_swap('data_in', (2, 1, 0))self.transformer.set_transpose('data_in', (2, 0, 1))self.thread_result = {}self.thread = Nonepool_size = 24self.image_processor = ImageProcessorCrop(self.transformer, self.flow)self.sequence_generator = sequenceGeneratorVideo(self.buffer_size, self.frames, self.num_videos, self.video_dict, self.video_order)self.pool = Pool(processes=pool_size)self.batch_advancer = BatchAdvancer(self.thread_result, self.sequence_generator, self.image_processor, self.pool)########################################self.dispatch_worker()########################################self.top_names = ['data', 'label','clip_markers']print 'Outputs:', self.top_namesif len(top) != len(self.top_names):raise Exception('Incorrect number of outputs (expected %d, got %d)' %(len(self.top_names), len(top)))########################################self.join_worker()########################################for top_index, name in enumerate(self.top_names):if name == 'data':shape = (self.N, self.channels, self.height, self.width)elif name == 'label':shape = (self.N,)#只有一个元素的tupleelif name == 'clip_markers':shape = (self.N,)#只有一个元素的tuple# * 表示传入的参数的个数不定# reshape函数:以shape = (self.N, self.channels, self.height, self.width)为例# top[top_index]会变成self.N个三维数组# 每个三维数组是self.channels个高self.height宽self.width的矩阵top[top_index].reshape(*shape)def reshape(self, bottom, top):passdef forward(self, bottom, top):if self.thread is not None:#########################################self.join_worker() ##########################################rearrange the data: #The LSTM takes inputs as [video0_frame0, video1_frame0,...] #but the data is currently arranged as [video0_frame0, video0_frame1, ...]new_result_data = [None]*len(self.thread_result['data']) new_result_label = [None]*len(self.thread_result['label']) new_result_cm = [None]*len(self.thread_result['clip_markers'])for i in range(self.frames):for ii in range(self.buffer_size):old_idx = ii*self.frames + inew_idx = i*self.buffer_size + iinew_result_data[new_idx] = self.thread_result['data'][old_idx]new_result_label[new_idx] = self.thread_result['label'][old_idx]new_result_cm[new_idx] = self.thread_result['clip_markers'][old_idx]for top_index, name in zip(range(len(top)), self.top_names):if name == 'data':for i in range(self.N):top[top_index].data[i, ...] = new_result_data[i] elif name == 'label':top[top_index].data[...] = new_result_labelelif name == 'clip_markers':top[top_index].data[...] = new_result_cm#################################self.dispatch_worker()#################################def dispatch_worker(self):assert self.thread is Noneself.thread = Thread(target=self.batch_advancer)#start(): 因为self.batch_advancer是Class BatchAdvancer的对象.#所以调用Class BatchAdvancer的__call__函数#从而调用advance_batch函数self.thread.start()def join_worker(self):assert self.thread is not None#join(): Wait until the thread terminates. #This blocks the calling thread until the thread whose join() method is called terminatesself.thread.join() self.thread = Nonedef backward(self, top, propagate_down, bottom):pass

def advance_batch(result, sequence_generator, image_processor, pool):#sequence_generator() 调用sequenceGeneratorVideo类里的__call__label_r, im_info = sequence_generator()tmp = image_processor(im_info[0])result['data'] = pool.map(image_processor, im_info)result['label'] = label_rcm = np.ones(len(label_r))cm[0::16] = 0#cm起了分割不同视频的作用. trainbuffer=24,cliplength=16#24x16的全1矩阵,第一列的24个元素全为0.将这个矩阵按行展开就是cm.result['clip_markers'] = cm

class sequenceGeneratorVideo(object):def __init__(self, buffer_size, clip_length, num_videos, video_dict, video_order):self.buffer_size = buffer_sizeself.clip_length = clip_lengthself.N = self.buffer_size*self.clip_lengthself.num_videos = num_videosself.video_dict = video_dictself.video_order = video_orderself.idx = 0 def __call__(self):label_r = []im_paths = []im_crop = []im_reshape = []  im_flip = []if self.idx + self.buffer_size >= self.num_videos:idx_list = range(self.idx, self.num_videos)idx_list.extend(range(0, self.buffer_size-(self.num_videos-self.idx)))else:#(train)buffer_size=24idx_list = range(self.idx, self.idx+self.buffer_size)#24 videosfor i in idx_list:key = self.video_order[i]label = self.video_dict[key]['label']video_reshape = self.video_dict[key]['reshape']video_crop = self.video_dict[key]['crop']#clip_length=16.So 16 elements with same value=[label] will be added to label_rlabel_r.extend([label]*self.clip_length)  im_reshape.extend([(video_reshape)]*self.clip_length)r0 = int(random.random()*(video_reshape[0] - video_crop[0]))r1 = int(random.random()*(video_reshape[1] - video_crop[1]))im_crop.extend([(r0, r1, r0+video_crop[0], r1+video_crop[1])]*self.clip_length)     f = random.randint(0,1)im_flip.extend([f]*self.clip_length)rand_frame = int(random.random()*(self.video_dict[key]['num_frames']-self.clip_length)+1+1)frames = []#frames里存[self.clip_length=16]张连续图片for i in range(rand_frame,rand_frame+self.clip_length):frames.append(self.video_dict[key]['frames'] %i)#for循环结束的时候im_paths里24x16张图片im_paths.extend(frames) #z1=[1,2,3]#z2=[4,5,6]#result=zip(z1,z2)#[(1, 4), (2, 5), (3, 6)]im_info = zip(im_paths,im_crop, im_reshape, im_flip)#这就保证了每次进来这个__call__函数都访问的是不同的videosself.idx += self.buffer_sizeif self.idx >= self.num_videos:self.idx = self.idx - self.num_videosreturn label_r, im_info

python layer-> image_data_layer

第一层python层是不支持multi-gpu的….因为lock会出问题.
所以来改image_data_layer作为数据第一层吧.

train_test_lstm_RGB.prototxt

layer {name: "data"type: "ImageData"top: "data"top: "label"top: "clip_markers"include {phase: TRAIN}transform_param {mirror: truecrop_size: 227mean_value: 103.939mean_value: 116.779mean_value: 128.68}image_data_param {source: "ucf101_split1_trainVideos.txt"batch_size: 32 #16*2new_height: 240new_width: 320root_folder: "/work/na"}
}
layer {name: "data"type: "ImageData"top: "data"top: "label"top: "clip_markers"include {phase: TEST  stage: "test-on-test" }transform_param {mirror: falsecrop_size: 227mean_value: 103.939mean_value: 116.779mean_value: 128.68}image_data_param {source: "ucf101_split1_testVideos.txt"batch_size: 48new_height: 240new_width: 320root_folder: "/work/na"}
}

base_data_layer.hpp

template <typename Dtype>
class BaseDataLayer : public Layer<Dtype> {protected:bool output_clip_markers_;
};template <typename Dtype>
class Batch {public:Blob<Dtype> data_, label_,clip_markers_;
};

base_data_layer.cpp

template <typename Dtype>
void BaseDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top) {if (top.size() == 1) {output_labels_ = false;output_clip_markers_ = false;} else if(top.size() == 2) {output_labels_ = true;output_clip_markers_ = false;} else if(top.size() == 3){output_labels_ = true;output_clip_markers_ = true;}
}template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {BaseDataLayer<Dtype>::LayerSetUp(bottom, top);for (int i = 0; i < PREFETCH_COUNT; ++i) {prefetch_[i].data_.mutable_cpu_data();if (this->output_labels_) {prefetch_[i].label_.mutable_cpu_data();}if (this->output_clip_markers_) {prefetch_[i].clip_markers_.mutable_cpu_data();}}
#ifndef CPU_ONLYif (Caffe::mode() == Caffe::GPU) {for (int i = 0; i < PREFETCH_COUNT; ++i) {prefetch_[i].data_.mutable_gpu_data();if (this->output_labels_) {prefetch_[i].label_.mutable_gpu_data();}if (this->output_clip_markers_) {prefetch_[i].clip_markers_.mutable_gpu_data();} }}
#endif
}template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {//...if (this->output_labels_) {// Reshape to loaded labels.top[1]->ReshapeLike(batch->label_);// Copy the labels.caffe_copy(batch->label_.count(), batch->label_.cpu_data(),top[1]->mutable_cpu_data());}if (this->output_clip_markers_) {top[2]->ReshapeLike(batch->clip_markers_);caffe_copy(batch->clip_markers_.count(), batch->clip_markers_.cpu_data(),top[2]->mutable_cpu_data());}prefetch_free_.push(batch);
/*LOG(INFO) << top.size();for(int i=1;i<top.size();++i){LOG(INFO) << "top data " << i;const Dtype *top_cpu_data = top[i]->cpu_data();for(int j=0;j<top[i]->count();++j){LOG(INFO) << top_cpu_data[j];}}*/
}

image_data_layer.hpp

template <typename Dtype>
class ImageDataLayer : public BasePrefetchingDataLayer<Dtype> {public:virtual inline int ExactNumTopBlobs() const { return 3; }protected:int Rand(int n);
};

image_data_layer.cpp

#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>#include <fstream>  // NOLINT(readability/streams)
#include <iostream>  // NOLINT(readability/streams)
#include <string>
#include <utility>
#include <vector>#include "caffe/data_transformer.hpp"
#include "caffe/layers/base_data_layer.hpp"
#include "caffe/layers/image_data_layer.hpp"
#include "caffe/util/benchmark.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/contrib/contrib.hpp>namespace caffe {void returnImageList(string ImagePath, vector<string>& fileNames)
{cv::Directory dir;fileNames = dir.GetListFiles(ImagePath, "*", false);
}string fileparts(string filename)
{int idx0 = filename.find_first_of("/");string a = filename.substr(idx0+1,filename.length()-1);return a;
}template <typename Dtype>
ImageDataLayer<Dtype>::~ImageDataLayer<Dtype>() {this->StopInternalThread();
}template <typename Dtype>
void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top) {const int new_height = this->layer_param_.image_data_param().new_height();const int new_width  = this->layer_param_.image_data_param().new_width();const bool is_color  = this->layer_param_.image_data_param().is_color();string root_folder = this->layer_param_.image_data_param().root_folder();const int batch_size = this->layer_param_.image_data_param().batch_size();CHECK((new_height == 0 && new_width == 0) ||(new_height > 0 && new_width > 0)) << "Current implementation requires ""new_height and new_width to be set at the same time.";// Read the file with filenames and labelsconst string& source = this->layer_param_.image_data_param().source();LOG(INFO) << "Opening file " << source;std::ifstream infile(source.c_str());string filename;int label;while (infile >> filename >> label) {lines_.push_back(std::make_pair(filename, label));}if (this->layer_param_.image_data_param().shuffle()) {// randomly shuffle dataLOG(INFO) << "Shuffling data";//const unsigned int prefetch_rng_seed = caffe_rng_rand();//prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));ShuffleImages();}const unsigned int prefetch_rng_seed = caffe_rng_rand();prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));LOG(INFO) << "A total of " << lines_.size() << " images.";lines_id_ = 0;// Check if we would need to randomly skip a few data pointsif (this->layer_param_.image_data_param().rand_skip()) {unsigned int skip = caffe_rng_rand() %this->layer_param_.image_data_param().rand_skip();LOG(INFO) << "Skipping first " << skip << " data points.";CHECK_GT(lines_.size(), skip) << "Not enough points to skip";lines_id_ = skip;}string imagePath=root_folder + "/" + lines_[lines_id_].first;vector<string> fileNames;if (this->output_clip_markers_){imagePath=root_folder + "/" + fileparts(lines_[lines_id_].first);returnImageList(imagePath, fileNames);imagePath=imagePath + "/"+ fileNames[0];vector<int> clipmarkers_shape(1, batch_size);top[2]->Reshape(clipmarkers_shape);for (int i = 0; i < this->PREFETCH_COUNT; ++i) {this->prefetch_[i].clip_markers_.Reshape(clipmarkers_shape);}}// Read an image, and use it to initialize the top blob.cv::Mat cv_img = ReadImageToCVMat(imagePath,new_height, new_width, is_color);CHECK(cv_img.data) << "Could not load " << imagePath;// Use data_transformer to infer the expected blob shape from a cv_image.vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);this->transformed_data_.Reshape(top_shape);// Reshape prefetch_data and top[0] according to the batch_size.CHECK_GT(batch_size, 0) << "Positive batch size required";top_shape[0] = batch_size;for (int i = 0; i < this->PREFETCH_COUNT; ++i) {this->prefetch_[i].data_.Reshape(top_shape);}top[0]->Reshape(top_shape);LOG(INFO) << "output data size: " << top[0]->num() << ","<< top[0]->channels() << "," << top[0]->height() << ","<< top[0]->width();// labelvector<int> label_shape(1, batch_size);top[1]->Reshape(label_shape);for (int i = 0; i < this->PREFETCH_COUNT; ++i) {this->prefetch_[i].label_.Reshape(label_shape);}
}template <typename Dtype>
void ImageDataLayer<Dtype>::ShuffleImages() {caffe::rng_t* prefetch_rng =static_cast<caffe::rng_t*>(prefetch_rng_->generator());shuffle(lines_.begin(), lines_.end(), prefetch_rng);
}template <typename Dtype>
void DataTransformer<Dtype>::InitRand() {const bool needs_rand = param_.mirror() ||(phase_ == TRAIN && param_.crop_size());if (needs_rand) {const unsigned int rng_seed = caffe_rng_rand();rng_.reset(new Caffe::RNG(rng_seed));} else {rng_.reset();}
}template <typename Dtype>
int ImageDataLayer<Dtype>::Rand(int n) {CHECK(prefetch_rng_);CHECK_GT(n, 0);caffe::rng_t* prefetch_rng =static_cast<caffe::rng_t*>(prefetch_rng_->generator());return ((*prefetch_rng)() % n);
}// This function is called on prefetch thread
template <typename Dtype>
void ImageDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {CPUTimer batch_timer;batch_timer.Start();double read_time = 0;double trans_time = 0;double dir_time=0;double idx_time=0;CPUTimer timer;CHECK(batch->data_.count());CHECK(this->transformed_data_.count());ImageDataParameter image_data_param = this->layer_param_.image_data_param();const int batch_size = image_data_param.batch_size();const int new_height = image_data_param.new_height();const int new_width = image_data_param.new_width();const bool is_color = image_data_param.is_color();string root_folder = image_data_param.root_folder();// Reshape according to the first image of each batch// on single input batches allows for inputs of varying dimension.string imagePath=root_folder + "/"+ lines_[lines_id_].first;vector<string> fileNames;int tbuffer;if (this->output_clip_markers_){tbuffer = batch_size / 16;imagePath=root_folder + "/" + fileparts(lines_[lines_id_].first);returnImageList(imagePath, fileNames);imagePath=imagePath + "/"+fileNames[0];}// Read an image, and use it to initialize the top blob.cv::Mat cv_img = ReadImageToCVMat(imagePath,new_height, new_width, is_color);CHECK(cv_img.data) << "Could not load " << imagePath;// Use data_transformer to infer the expected blob shape from a cv_img.vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);this->transformed_data_.Reshape(top_shape);// Reshape batch according to the batch_size.top_shape[0] = batch_size;batch->data_.Reshape(top_shape);Dtype* prefetch_data = batch->data_.mutable_cpu_data();Dtype* prefetch_label = batch->label_.mutable_cpu_data();// datum scalesconst int lines_size = lines_.size();if (this->output_clip_markers_){Dtype* prefetch_clip_markers = batch->clip_markers_.mutable_cpu_data();for (int item_id = 0; item_id < tbuffer; ++item_id) {// get a blobCHECK_GT(lines_size, lines_id_);timer.Start();imagePath=root_folder + "/" + fileparts(lines_[lines_id_].first);returnImageList(imagePath, fileNames);dir_time += timer.MicroSeconds();int randID = Rand(fileNames.size()-16+1);//(rand() % (fileNames.size()-16+1));DLOG(INFO) << imagePath<<"-randID:"<<randID;string imagePath1;for(int image_id=randID;image_id<16+randID;++image_id){timer.Start();imagePath1=imagePath+ "/"+fileNames[image_id];cv::Mat cv_img = ReadImageToCVMat(imagePath1,new_height, new_width, is_color);read_time += timer.MicroSeconds();CHECK(cv_img.data) << "Could not load " << imagePath1;//LOG(INFO) << "ImagePath1" << imagePath1<<new_height<<new_width;timer.Start();/* suppose tbuffer=4*  /  0  1  2  3   4   5   6  ... 16* 0/  0  4  8  12  16  20  24 ... 64* 1/  1  5  9  13  17  21  25 ... 65* 2/  2  6  10 14  18  22  26 ... 66* 3/  3  7  11 15  19  23  27 ... 67*               */int imgPosition = tbuffer*(image_id-randID)+item_id;int offset = batch->data_.offset(imgPosition);// Apply transformations (mirror, crop...) to the imagethis->transformed_data_.set_cpu_data(prefetch_data + offset);//int rid=item_id*16+(image_id-randID);prefetch_label[imgPosition] = lines_[lines_id_].second;if(image_id==randID){this->data_transformer_->Transform(cv_img, &(this->transformed_data_),true);prefetch_clip_markers[imgPosition] = 0;}else{this->data_transformer_->Transform(cv_img, &(this->transformed_data_),false);prefetch_clip_markers[imgPosition] = 1;}idx_time += timer.MicroSeconds();}// go to the next iterlines_id_++;if (lines_id_ >= lines_size) {// We have reached the end. Restart from the first.DLOG(INFO) << "Restarting data prefetching from start.";lines_id_ = 0;}}batch_timer.Stop();DLOG(INFO) << "Prefetch  batch: " << batch_timer.MilliSeconds() << " ms.";DLOG(INFO) << "Directory time: " << dir_time / 1000 << " ms.";DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";DLOG(INFO) << "ChangeIdx time: " << idx_time / 1000 << " ms.";}else{for (int item_id = 0; item_id < batch_size; ++item_id) {// get a blobtimer.Start();CHECK_GT(lines_size, lines_id_);cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,new_height, new_width, is_color);CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;read_time += timer.MicroSeconds();timer.Start();// Apply transformations (mirror, crop...) to the imageint offset = batch->data_.offset(item_id);this->transformed_data_.set_cpu_data(prefetch_data + offset);this->data_transformer_->Transform(cv_img, &(this->transformed_data_));trans_time += timer.MicroSeconds();prefetch_label[item_id] = lines_[lines_id_].second;// go to the next iterlines_id_++;if (lines_id_ >= lines_size) {// We have reached the end. Restart from the first.DLOG(INFO) << "Restarting data prefetching from start.";lines_id_ = 0;if (this->layer_param_.image_data_param().shuffle()) {ShuffleImages();}}}batch_timer.Stop();DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";}
}
INSTANTIATE_CLASS(ImageDataLayer);
REGISTER_LAYER_CLASS(ImageData);}  // namespace caffe
#endif  // USE_OPENCV

data_transformer.hpp

void Transform(const cv::Mat& cv_img, Blob<Dtype>* transformed_blob,bool changeCrop = true);

data_transformer.cpp

template<typename Dtype>
void DataTransformer<Dtype>::Transform(const cv::Mat& cv_img,Blob<Dtype>* transformed_blob,bool changeCrop) {//...if (crop_size) {CHECK_EQ(crop_size, height);CHECK_EQ(crop_size, width);// We only do random crop when we do training.if ((phase_ == TRAIN) && changeCrop) {h_off = Rand(img_height - crop_size + 1);w_off = Rand(img_width - crop_size + 1);former_h_off = h_off;former_w_off = w_off;} else if((phase_ == TRAIN) && !changeCrop){h_off = former_h_off;w_off = former_w_off;} else if(phase_ == TEST) {h_off = (img_height - crop_size) / 2;w_off = (img_width - crop_size) / 2;}DLOG(INFO)<<h_off<<","<<w_off;cv::Rect roi(w_off, h_off, crop_size, crop_size);cv_cropped_img = cv_img(roi);} else {CHECK_EQ(img_height, height);CHECK_EQ(img_width, width);}//....
}

Other changes

net.cpp

template <typename Dtype>
void Net<Dtype>::Init(const NetParameter& in_param) {/*CHECK(Caffe::root_solver() || root_net_)<< "root_net_ needs to be set for all non-root solvers";*/bool isLSTMLayer=false;if(!Caffe::root_solver() && root_net_==NULL)isLSTMLayer = true;// Set phase from the state.//....for (int layer_id = 0; layer_id < param.layer_size(); ++layer_id) {// For non-root solvers, whether this layer is shared from root_net_.bool share_from_root;if(isLSTMLayer)share_from_root = false;elseshare_from_root = !Caffe::root_solver()&& root_net_->layers_[layer_id]->ShareInParallel();// Inherit phase from net if unset.// ...

base_data_layer.cu

#include <vector>#include "caffe/layers/base_data_layer.hpp"namespace caffe {template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {Batch<Dtype>* batch = prefetch_full_.pop("Data layer prefetch queue empty");// Reshape to loaded data.top[0]->ReshapeLike(batch->data_);// Copy the datacaffe_copy(batch->data_.count(), batch->data_.gpu_data(),top[0]->mutable_gpu_data());if (this->output_labels_) {// Reshape to loaded labels.top[1]->ReshapeLike(batch->label_);// Copy the labels.caffe_copy(batch->label_.count(), batch->label_.gpu_data(),top[1]->mutable_gpu_data());}if (this->output_clip_markers_) {top[2]->ReshapeLike(batch->clip_markers_);caffe_copy(batch->clip_markers_.count(), batch->clip_markers_.gpu_data(),top[2]->mutable_gpu_data());}// Ensure the copy is synchronous wrt the host, so that the next batch isn't// copied in meanwhile.CUDA_CHECK(cudaStreamSynchronize(cudaStreamDefault));prefetch_free_.push(batch);
}INSTANTIATE_LAYER_GPU_FORWARD(BasePrefetchingDataLayer);}  // namespace caffe

caffe.proto

message LossParameter {//...optional NormalizationMode normalization = 3;optional bool normalize = 2[default = false];
}

recurrent layer

inheritance

[caffe] Long-term Recurrent Convolutional Networks相关推荐

long term recurrent convolutional networks for visual recognition and description
这篇属于很早就探索cnn+rnn解决high-level computer vision task的文章 Abstract 基于深度卷积网络的模型已经在最近的图像解释任务中成为主流,在这里我们研究了是 ...
《Long-term Recurrent Convolutional Networks for Visual Recognition and Description》论文翻译
<Long-term Recurrent Convolutional Networks for Visual Recognition and Description>论文翻译原文链接: ...
【Paper】CNN-LSTM：Long-term Recurrent Convolutional Networks for Visual Recognition and Description
论文期刊:CVPR 2015 (oral) 论文被引:3673 (04/24/20) 论文原文:点击此处该论文是 CNN-LSTM 的开山鼻祖,主要用于生成图像描述.初稿发布于2014年,拿到了 C ...
Shape Inpainting using 3D Generative Adversarial Network and Recurrent Convolutional Networks
摘要卷积神经网络的最新进展已显示出有希望的3D形状完成结果. 由于GPU内存的限制,这些方法只能产生低分辨率的输出. 为了用语义上的合理性和上下文详细信息修补3D模型,我们引入了一个混合框架,该框架 ...
Long-term Recurrent Convolutional Networks for Visual Recognition and Description
视觉识别和描述的长期递归卷积网络摘要:基于深度卷积网络的模型主导了最近的图像解释任务.我们调查了也经常使用的模型是否对涉及序列,视觉和其他方面的任务有效.我们描述了一类递归卷积体系结构,它是端到端可 ...
【多标签文本分类】Ensemble Application of Convolutional and Recurrent Neural Networks for Multi-label Text
·阅读摘要: 本文提出基于Seq2Seq模型,提出CNN-RNN模型应用于多标签文本分类.论文表示CNN-RNN模型在大型数据集上表现的效果很好,在小数据集效果不好. ·参考文献: [1] E ...
【文本分类】Recurrent Convolutional Neural Networks for Text Classification
·摘要: 从模型的角度,本文作者将RNN(Bi-LSTM)和max_pooling结合使用,提出RCNN模型,应用到了NLP的文本分类任务中,提高了分类精度. ·参考文献: [1] Recur ...
Single Channel Speech Enhancement Using Temporal Convolutional Recurrent Neural Networks
Single Channel Speech Enhancement Using Temporal Convolutional Recurrent Neural Networks 标题:基于时域卷积递归 ...
＜笔记＞Long and Short -Term Recommendations with Recurrent Neural Networks
<笔记>Long and Short -Term Recommendations with Recurrent Neural Networks 基于项目的序列神经网络推荐总结: (1)证 ...
Fully Convolutional Networks for semantic Segmentation（深度学习经典论文翻译）
原文链接:https://www.cnblogs.com/xuanxufeng/p/6249834.html 摘要卷积网络在特征分层领域是非常强大的视觉模型.我们证明了经过端到端.像素到像素训练的卷 ...

[caffe] Long-term Recurrent Convolutional Networks

Python Layer

python_layer.cpp

sequence_input_layer.py

python layer-> image_data_layer

train_test_lstm_RGB.prototxt

base_data_layer.hpp

base_data_layer.cpp

image_data_layer.hpp

image_data_layer.cpp

data_transformer.hpp

data_transformer.cpp

Other changes

net.cpp

base_data_layer.cu

caffe.proto

recurrent layer

inheritance

[caffe] Long-term Recurrent Convolutional Networks相关推荐

最新文章

热门文章