struck（结构化SVM用于视觉跟踪）--源代码详解--main.cpp

struck 利用结构化SVM来实现视觉跟踪，在深度学习流行起来之前，struck是视觉跟踪领域效果最好的方法。深度学习流行之后，利用泛化的卷积特征能够得到很好的效果。struck的优点在于，它可以使用任意的特征来实现跟踪，因此它可以利用卷积神经网络提取的特征，然后结合结构化SVM来实现视觉跟踪，这样的效果说不定更好。

struck的源码是C++实现的，作者写的很好，思路清晰，代码结构清晰，而且与论文中的相符，没有那么多小trick，结果比较可靠。

下面从它的主函数开始，分析这份源码是如何实现的：

main.cpp

/* * Struck: Structured Output Tracking with Kernels* * Code to accompany the paper:*   Struck: Structured Output Tracking with Kernels*   Sam Hare, Amir Saffari, Philip H. S. Torr*   International Conference on Computer Vision (ICCV), 2011* * Copyright (C) 2011 Sam Hare, Oxford Brookes University, Oxford, UK* * This file is part of Struck.* * Struck is free software: you can redistribute it and/or modify* it under the terms of the GNU General Public License as published by* the Free Software Foundation, either version 3 of the License, or* (at your option) any later version.* * Struck is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the* GNU General Public License for more details.* * You should have received a copy of the GNU General Public License* along with Struck.  If not, see <http://www.gnu.org/licenses/>.* */#include "Tracker.h"
#include "Config.h"#include <iostream>
#include <fstream>#include <opencv/cv.h>
#include <opencv/highgui.h>using namespace std;
using namespace cv;static const int kLiveBoxWidth = 80;
static const int kLiveBoxHeight = 80;void rectangle(Mat& rMat, const FloatRect& rRect, const Scalar& rColour)
{IntRect r(rRect);rectangle(rMat, Point(r.XMin(), r.YMin()), Point(r.XMax(), r.YMax()), rColour);
}int main(int argc, char* argv[])
{//这几句话没啥作用，我给注释掉
#ifndef WIN32string programName = argv[0];programName = programName.substr(programName.find_first_of('/'));cout << "programName: " << programName << endl;
#endif// read config filestring configPath = "../docs/config.txt";Config conf(configPath);//作者定义的类Config 读取了所有的配置信息，并且cout输出cout << conf << endl;if (conf.features.size() == 0){cout << "error: no features specified in config" << endl;return EXIT_FAILURE;}if (argc > 1){conf.sequenceName = argv[1];}ofstream outFile;//定义一个输出文件流，输出结果if (conf.resultsPath != ""){
#ifdef WIN32string resultsPath = conf.resultsPath + "/" + conf.sequenceName + "_result.txt";
#elsestring resultsPath = conf.resultsPath + "/" + conf.sequenceName + "_" + programName + "Result.txt";#endifoutFile.open(resultsPath, ios::out);if (!outFile){cout << "error: could not open results file: " << conf.resultsPath << endl;return EXIT_FAILURE;}}// if no sequence specified then use the camerabool useCamera = (conf.sequenceName == "");//根据在config.txt中是否给出视频名称，判断是否使用摄像头VideoCapture cap;int startFrame = -1;int endFrame = -1;FloatRect initBB;//这是一个模板类，string imgFormat;float scaleW = 1.f;float scaleH = 1.f;if (useCamera)//使用摄像头{if (!cap.open(0)){cout << "error: could not start camera capture" << endl;return EXIT_FAILURE;}startFrame = 0;endFrame = INT_MAX;Mat tmp;cap >> tmp;//读入一帧视频scaleW = (float)conf.frameWidth/tmp.cols;//config中宽/读入视频的宽，比率scaleH = (float)conf.frameHeight/tmp.rows;/*该函数，创造了一个矩形，左上角在（120,80）,80*80的矩形*/initBB = IntRect(conf.frameWidth/2-kLiveBoxWidth/2, conf.frameHeight/2-kLiveBoxHeight/2, kLiveBoxWidth, kLiveBoxHeight);cout << "press 'i' to initialise tracker" << endl;}else//使用视频{// parse frames filestring framesFilePath = conf.sequenceBasePath+"/"+conf.sequenceName+"/"+"frames.txt";ifstream framesFile(framesFilePath.c_str(), ios::in);if (!framesFile){cout << "error: could not open sequence frames file: " << framesFilePath << endl;return EXIT_FAILURE;}string framesLine;getline(framesFile, framesLine);printf("%s", framesLine.c_str());sscanf(framesLine.c_str(), "%d,%d", &startFrame, &endFrame);if (framesFile.fail() || startFrame == -1 || endFrame == -1){cout << "error: could not parse sequence frames file" << endl;return EXIT_FAILURE;}imgFormat = conf.sequenceBasePath+"/"+conf.sequenceName+"/img/%04d.jpg";//qyy changed// read first frame to get sizechar imgPath[256];sprintf(imgPath, imgFormat.c_str(), startFrame);Mat tmp = cv::imread(imgPath, 0);scaleW = (float)conf.frameWidth/tmp.cols;scaleH = (float)conf.frameHeight/tmp.rows;// read init box from ground truth filestring gtFilePath = conf.sequenceBasePath+"/"+conf.sequenceName+"/"+"groundtruth_rect.txt";//qyy changedifstream gtFile(gtFilePath.c_str(), ios::in);if (!gtFile){cout << "error: could not open sequence gt file: " << gtFilePath << endl;return EXIT_FAILURE;}string gtLine;getline(gtFile, gtLine);float xmin = -1.f;float ymin = -1.f;float width = -1.f;float height = -1.f;sscanf(gtLine.c_str(), "%f,%f,%f,%f", &xmin, &ymin, &width, &height);if (gtFile.fail() || xmin < 0.f || ymin < 0.f || width < 0.f || height < 0.f){cout << "error: could not parse sequence gt file" << endl;return EXIT_FAILURE;}initBB = FloatRect(xmin*scaleW, ymin*scaleH, width*scaleW, height*scaleH);}Tracker tracker(conf);//使用conf类，初始化Tracker类if (!conf.quietMode)//quietMode模式下，不显示结果，只运算{namedWindow("result");}Mat result(conf.frameHeight, conf.frameWidth, CV_8UC3);bool paused = false;bool doInitialise = false;srand(conf.seed);for (int frameInd = startFrame; frameInd <= endFrame; ++frameInd){cout << "frame num is: " << frameInd << endl;//qyyMat frame;if (useCamera){Mat frameOrig;cap >> frameOrig;resize(frameOrig, frame, Size(conf.frameWidth, conf.frameHeight));//imshow("result",frame);//qyy//waitKey(0);//qyyflip(frame, frame, 1);//作者把视频左右对称翻转了，不知道为什么这么做？//imshow("result", frame);//qyy//waitKey(0);//qyyframe.copyTo(result);if (doInitialise){if (tracker.IsInitialised()){tracker.Reset();}else{tracker.Initialise(frame, initBB);}doInitialise = false;}else if (!tracker.IsInitialised()){rectangle(result, initBB, CV_RGB(255, 255, 255));//没有初始化，就在result上画白色框框}}else{         char imgPath[256];sprintf(imgPath, imgFormat.c_str(), frameInd);Mat frameOrig = cv::imread(imgPath, 0);//第二个参数flag指定读取的颜色类型，=0表示读取为灰度图像cout << "frameOrig.channels: " << frameOrig.channels() << endl;//qyyif (frameOrig.empty()){cout << "error: could not read frame: " << imgPath << endl;return EXIT_FAILURE;}resize(frameOrig, frame, Size(conf.frameWidth, conf.frameHeight));cvtColor(frame, result, CV_GRAY2RGB);//作者读进来的时候是灰度图像，为了显示转换成3通道都是灰度图if (frameInd == startFrame)//如果是第一帧，初始化{tracker.Initialise(frame, initBB);}}if (tracker.IsInitialised())//如果初始化了，就开始跟踪{tracker.Track(frame);//跟踪程序，把tracker当做一个类来对待，很清晰明了啊，赞一个；算法都在这里面实现if (!conf.quietMode && conf.debugMode){tracker.Debug();//debug模式下，可以开启很多额外的窗口显示}rectangle(result, tracker.GetBB(), CV_RGB(0, 255, 0));//使用绿色框，画出跟踪的效果if (outFile)//这里是得到的矩形框，存储到txt文本中{const FloatRect& bb = tracker.GetBB();outFile << bb.XMin() / scaleW << "," << bb.YMin() / scaleH << "," << bb.Width() / scaleW << "," << bb.Height() / scaleH << flush << endl;cout << "cout to file: " << bb.XMin() / scaleW << "," << bb.YMin() / scaleH << "," << bb.Width() / scaleW << "," << bb.Height() / scaleH << endl;}}if (!conf.quietMode)//如果使用的是摄像头，作者提供了几个按键来选择是否初始化，我用的是OTB数据集，就不管这个了{imshow("result", result);int key = waitKey(paused ? 0 : 1);if (key != -1){if (key == 27 || key == 113) // esc q{break;}else if (key == 112) // p{paused = !paused;}else if (key == 105 && useCamera)//i{doInitialise = true;cout << "initialised !" << endl;//qyy}}if (conf.debugMode && frameInd == endFrame){cout << "\n\nend of sequence, press any key to exit" << endl;//waitKey();}}}if (outFile.is_open()){outFile.close();}return EXIT_SUCCESS;
}

所以，后面我主要关注tracker这个类做了什么，我们看到在main.cpp中调用了tracker.Initialize Debug Track这几个成员函数，所以这几个函数是作者算法实现的关键。

struck（结构化SVM用于视觉跟踪）--源代码详解--main.cpp相关推荐

ICCV2021 | 用于视觉跟踪的学习时空型transformer
前言本文介绍了一个端到端的用于视觉跟踪的transformer模型,它能够捕获视频序列中空间和时间信息的全局特征依赖关系.在五个具有挑战性的短期和长期基准上实现了SOTA性能,具有实时性,比Sia ...
Learning a Deep Compact Image Representation for Visual Tracking 学习用于视觉跟踪的深度紧凑图像表示
原文链接摘要在本文中,我们研究了跟踪可能非常复杂背景的视频中运动物体轨迹的挑战性问题.与大多数仅在线学习跟踪对象外观的现有跟踪器相比,我们采用不同的方法,受深度学习架构的最新进展的启发,更加强调( ...
卡尔曼滤波代码JAVA_卡尔曼滤波视觉跟踪源代码及效果视频
[实例简介] 卡尔曼滤波视觉跟踪源代码及效果视频,直接运行即可,调试完全可用,matlab仿真源码,程序简单易懂,非常适合新手学习 [实例截图] [核心代码] 29b664ed-3607-4fdf-a ...
干货 | OpenCV中KLT光流跟踪原理详解与代码演示
点击上方"小白学视觉",选择加"星标"或"置顶" 重磅干货,第一时间送达本文转自:opencv学堂稀疏光流跟踪(KLT)详解在视频移动 ...
java的markword_【转帖】Java工具结构与锁实现原理及MarkWord详解
Java工具结构与锁实现原理及MarkWord详解 https://www.pianshen.com/article/2382167638/ 我们都知道,Java工具存储在堆(Heap)内存.那么一个 ...
emule中节点加入Kad网络过程（源代码详解）【对原文部分改进】
from: http://blog.csdn.net/chenbuaa/article/details/2301656 emule中节点加入Kad网络过程(源代码详解) 程序启动: EmuleDlg. ...
视觉定位系统怎么实现定位及引导贴合的应用？视觉定位系统案例详解
视觉定位系统采用先进的图像视觉检测技术,实现对高速运动的工业产品进行实时全面的视觉定位分析.机器视觉系统可以起到人类视觉的作用,利用自动化科技来替代人眼,使质量进一步升级,不仅可以提高工作效率,而且减 ...
线程池源代码详解，参数详解
线程池源代码详解,参数详解 ThreadPoolExecutor 构造函数源代码 public ThreadPoolExecutor(int corePoolSize, int maximumPool ...
大白话解析Apriori算法python实现（含源代码详解）
大白话解析Apriori算法python实现(含源代码详解) 一.专业名词解释二.算法思路三.python代码实现四.Aprioir的优点.缺点及改进方法本文为博主原创文章,转载请注明出处,并 ...
Py之seaborn：数据可视化seaborn库(三)的矩阵图可视化之jointplot/JointGrid/pairplot/PairGrid/FacetGrid密度图等的函数源代码详解之最强攻略
Py之seaborn:数据可视化seaborn库(三)的矩阵图可视化之jointplot/JointGrid/pairplot/PairGrid/FacetGrid折线图/柱状图+散点图/矩形密度图的 ...

struck（结构化SVM用于视觉跟踪）--源代码详解--main.cpp

struck（结构化SVM用于视觉跟踪）--源代码详解--main.cpp相关推荐

最新文章

热门文章