终于到了介绍如何使用Siamese网络跑自己的数据了,在网上、论坛上、群里关于用Siamese网络的资料很多,但是实战的资料很少,难道是因为太容易了吗?反正博主查阅了各种地方,几乎没有找到Siamese网络实战的东东,即使有零星关于实战的东西,那也是基于Ubuntu系统,殊不知Ubuntu系统跑caffe可要比Windows简单的多了,所以,就本博主的调研情况来看,这篇博客绝对称的上是Windows平台使用Siamese网络跑自己的数据的第一篇详细资料!这一篇介绍如何利用Windows caffe Siamese网络跑自己的数据,下一篇打算介绍如何调整和搭建网络结构。。。。

我们知道Siamese网络是要输入一个图像对,这个图像对是一对image pair,所以首先要把图像数据放到文件夹中,然后建立一个索引文件,索引文件的每一行是两个图像名,代表一个图像对。样式如下:


* convertImgToSiamese.cpp
*/#include <algorithm>
#include <fstream>
#include <string>
#include <cstdio>
#include <utility>
#include <vector>
//#include <cstdlib>#include "boost/scoped_ptr.hpp"
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "leveldb/db.h"#include "caffe/proto/caffe.pb.h"
#include "caffe/util/io.hpp"
#include "caffe/util/rng.hpp"
//#include "caffe/util/format.hpp"
#include "caffe/util/math_functions.hpp"#include "opencv2/opencv.hpp"
#include "google/protobuf/text_format.h"
#include "stdint.h"
#include <cstdio>
#include <iostream>
#include <cmath>using namespace caffe;
using std::pair;
using boost::scoped_ptr;
using namespace cv;
using namespace std;DEFINE_bool(gray, false, "when this option is on, treat images as grayscale ones");
DEFINE_bool(shuffle, false, "randomly shuffle the order of images and their labels");
DEFINE_string(backend, "leveldb", "the backend {lmdb, leveldb} for storing the result");
DEFINE_int32(resize_width, 0, "Width images are resized to");
DEFINE_int32(resize_height, 0, "Height images are resized to");
DEFINE_bool(check_size, false,"When this option is on, check that all the datum have the same size");
DEFINE_bool(encoded, false,"When this option is on, the encoded image will be save in datum");
DEFINE_string(encode_type, "","Optional: What type should we encode the image as ('png','jpg',...).");
DEFINE_int32(channel, 3, "channel numbers of the image");     //1//static bool ReadImageToMemory(const string &FileName, const int Height, const int Width, char *Pixels)   //2
static bool ReadImageToMemory(const string &FileName, const int Height, const int Width, char *Pixels)
{//read image//cv::Mat OriginImage = cv::imread(FileName, cv::IMREAD_GRAYSCALE);cv::Mat OriginImage = cv::imread(FileName);     //3. read color imageCHECK(OriginImage.data) << "Failed to read the image.\n";//resize the imagecv::Mat ResizeImage;cv::resize(OriginImage, ResizeImage, cv::Size(Width, Height));CHECK(ResizeImage.rows == Height) << "The heighs of Image is no equal to the input height.\n";CHECK(ResizeImage.cols == Width) << "The width of Image is no equal to the input width.\n";CHECK(ResizeImage.channels() == 3) << "The channel of Image is no equal to three.\n";    //4. should output the warning here// LOG(INFO) << "height " << ResizeImage.rows << " ";//LOG(INFO) << "weidth " << ResizeImage.cols << " ";//LOG(INFO) << "channels " << ResizeImage.channels() << "\n";// copy the image data to Pixelsfor (int HeightIndex = 0; HeightIndex < Height; ++HeightIndex){const uchar* ptr = ResizeImage.ptr<uchar>(HeightIndex);int img_index = 0;for (int WidthIndex = 0; WidthIndex < Width; ++WidthIndex){for (int ChannelIndex = 0; ChannelIndex < ResizeImage.channels(); ++ChannelIndex){int datum_index = (ChannelIndex * Height + HeightIndex) * Width + WidthIndex;*(Pixels + datum_index) = static_cast<char>(ptr[img_index++]);}}}return true;
}int main(int argc, char** argv)
#ifndef GFLAGS_GFLAGS_H_namespace gflags = google;
#endifgflags::SetUsageMessage("Convert a set of color images to the leveldb\n""format used as input for Caffe.\n""Usage:\n""    convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME\n");caffe::GlobalInit(&argc, &argv);// 输入参数不足时报错if (argc < 4){gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/convert_imageset");return 1;}// 读取图像名字和标签std::ifstream infile(argv[2]);std::vector<std::pair<std::string, std::string> > lines;std::string filename;std::string pairname;int label;while (infile >> filename >> pairname){lines.push_back(std::make_pair(filename, pairname));}// 打乱图片顺序if (FLAGS_shuffle){// randomly shuffle dataLOG(INFO) << "Shuffling data";shuffle(lines.begin(), lines.end());}LOG(INFO) << "A total of " << lines.size() << " images.";// 设置图像的高度和宽度int resize_height = std::max<int>(0, FLAGS_resize_height);int resize_width = std::max<int>(0, FLAGS_resize_width);int channel = std::max<int>(1, FLAGS_channel);     //5. add channel info// 打开数据库// Open leveldbleveldb::DB* db;leveldb::Options options;options.create_if_missing = true;options.error_if_exists = true;leveldb::Status status = leveldb::DB::Open(options, argv[3], &db);CHECK(status.ok()) << "Failed to open leveldb " << argv[3]<< ". Is it already existing?";// 保存到leveldb// Storing to leveldbstd::string root_folder(argv[1]);//char* Pixels = new char[2 * resize_height * resize_width];char* Pixels = new char[2 * resize_height * resize_width * channel];    //6. add channelconst int kMaxKeyLength = 10;   //10char key[kMaxKeyLength];std::string value;caffe::Datum datum;//datum.set_channels(2);  // one channel for each image in the pairdatum.set_channels(2 * channel);                //7. 3 channels for each image in the pairdatum.set_height(resize_height);datum.set_width(resize_width);//// int line_size = (int)(lines.size()/2);// std::cout<<"number of lines: "<<line_size<<endl;for (int LineIndex = 0; LineIndex < lines.size(); LineIndex++){//int PairIndex = LineIndex + line_size;// cout<<PairIndex<<endl;// int PairIndex = caffe::caffe_rng_rand() % lines.size();char* FirstImagePixel = Pixels;// cout<<root_folder + lines[LineIndex].first<<endl;ReadImageToMemory(root_folder + lines[LineIndex].first, resize_height, resize_width, FirstImagePixel);  //8. add channel here//char *SecondImagePixel = Pixels + resize_width * resize_height;char *SecondImagePixel = Pixels + resize_width * resize_height * channel;       //10. add channelReadImageToMemory(root_folder + lines[LineIndex].second, resize_height, resize_width, SecondImagePixel);  //9. add channel here// set image pair data// datum.set_data(Pixels, 2 * resize_height * resize_width);datum.set_data(Pixels, 2 * resize_height * resize_width * channel);     //11. correct// set label// for training, first 1000 pairs are true; for testing,first 1000 pairs are true// if (LineIndex<4000)   //train: 912,3000 true pairs, 81,1080 false pairs;//test: 35600 true pairs, 33500 false pairsif (LineIndex<9123000){datum.set_label(1);}else{datum.set_label(0);}// printf("first index: %d, second index: %d, labels: %d \n", lines[LineIndex].second, lines[PairIndex].second, datum.label());// serialize datum to stringdatum.SerializeToString(&value);int key_value = (int)(LineIndex);_snprintf(key, kMaxKeyLength, "%08d", key_value);string keystr(key);cout << "label: " << datum.label() << ' ' << "key index: " << keystr << endl;//sprintf_s(key, kMaxKeyLength, "%08d", LineIndex);     db->Put(leveldb::WriteOptions(), std::string(key), value);}delete db;delete[] Pixels;return 0;







// 设置图像的高度和宽度int resize_height = std::max<int>(0, FLAGS_resize_height);int resize_width = std::max<int>(0, FLAGS_resize_width);//要把数据转换成resize大小,转到FLAGS_resize_width和FLAGS_resize_height定义,发现是两个宏定义,这就是坑!
DEFINE_int32(resize_width, 0, "Width images are resized to");
DEFINE_int32(resize_height, 0, "Height images are resized to");//把要转化的大小设置成了0,要命了啊,怪不得会出现resize错误,都是0,当然会报错!


开始训练以后,我的会caffe会报一个错误,第一个卷积层的卷积核参数共享提示维度不匹配,一个是20*1*5*5,共享层是20*5*5*5,没办法只能把这个参数共享关了,不知道具体原因是什么,参数共享的这两个卷积层明明是一样的 ,如果有知道原因的同学还想请教一下,在此先谢过了。


解决方法,就是在slice_point: 1去除,使得数据平均分配,就没有维度不匹配的问题了。


