从https://github.com/tesseract-ocr/tesseract下载最新源码,commit id: 86acff5, 2016.06.07. 里面有个vs2010目录,用vs2013打开tesseract.sln。Tesseract依赖图像库Leptonica,Leptonica的编译过程可以参考http://blog.csdn.net/fengbingchun/article/details/44275233,它的源码在https://github.com/fengbingchun/Liblept_Test,将Leptonica静态库及头文件加入到libtesseract304工程属性配置中。修改tesseract中equationdetect.cpp文件,将

static const STRING kCharsToEx[] = {"'", "`", "\"", "\\", ",", ".","〈", "〉", "《", "》", "」", "「", ""};

修改为

static const STRING kCharsToEx[] = { "'", "`", "\"", "\\", ",", ".","<", ">", "<<", ">>", "" };

分别在LIB_Debug和LIB_Release下编译libtesseract304工程,便能生成tesseract静态库。

仿照libtesseract304工程,编译tesseract工程,将静态库zlib、tiff、lept和tesseract加入到工程属性中即可。

新建Tesseract-OCR_Test控制台工程,将相应头文件和静态库加入到此工程中,测试代码来自于src/api/tesseractmain.cpp,如下:

#include <iostream>#include "allheaders.h"
#include "baseapi.h"
#include "basedir.h"
#include "renderer.h"
#include "strngs.h"
#include "tprintf.h"
#include "openclwrapper.h"
#include "osdetect.h"void PrintVersionInfo() {char *versionStrP;printf("tesseract %s\n", tesseract::TessBaseAPI::Version());versionStrP = getLeptonicaVersion();printf(" %s\n", versionStrP);lept_free(versionStrP);versionStrP = getImagelibVersions();printf("  %s\n", versionStrP);lept_free(versionStrP);
}void PrintUsage(const char* program) {printf("Usage:\n""  %s --help | --help-psm | --version\n""  %s --list-langs [--tessdata-dir PATH]\n""  %s --print-parameters [options...] [configfile...]\n""  %s imagename|stdin outputbase|stdout [options...] [configfile...]\n",program, program, program, program);
}void PrintHelpForPSM() {const char* msg ="Page segmentation modes:\n""  0    Orientation and script detection (OSD) only.\n""  1    Automatic page segmentation with OSD.\n""  2    Automatic page segmentation, but no OSD, or OCR.\n""  3    Fully automatic page segmentation, but no OSD. (Default)\n""  4    Assume a single column of text of variable sizes.\n""  5    Assume a single uniform block of vertically aligned text.\n""  6    Assume a single uniform block of text.\n""  7    Treat the image as a single text line.\n""  8    Treat the image as a single word.\n""  9    Treat the image as a single word in a circle.\n"" 10    Treat the image as a single character.\n";printf("%s", msg);
}void PrintHelpMessage(const char* program) {PrintUsage(program);const char* ocr_options ="OCR options:\n""  --tessdata-dir PATH   Specify the location of tessdata path.\n""  --user-words PATH     Specify the location of user words file.\n""  --user-patterns PATH  Specify the location of user patterns file.\n""  -l LANG[+LANG]        Specify language(s) used for OCR.\n""  -c VAR=VALUE          Set value for config variables.\n""                        Multiple -c arguments are allowed.\n""  -psm NUM              Specify page segmentation mode.\n""NOTE: These options must occur before any configfile.\n";printf("\n%s\n", ocr_options);PrintHelpForPSM();const char *single_options ="Single options:\n""  -h, --help            Show this help message.\n""  --help-psm            Show page segmentation modes.\n""  -v, --version         Show version information.\n""  --list-langs          List available languages for tesseract engine.\n""  --print-parameters    Print tesseract parameters to stdout.\n";printf("\n%s", single_options);
}void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc, char** argv) {char opt1[256], opt2[255];for (int i = 0; i < argc; i++) {if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {strncpy(opt1, argv[i + 1], 255);opt1[255] = '\0';char *p = strchr(opt1, '=');if (!p) {fprintf(stderr, "Missing = in configvar assignment\n");exit(1);}*p = 0;strncpy(opt2, strchr(argv[i + 1], '=') + 1, 255);opt2[254] = 0;++i;if (!api->SetVariable(opt1, opt2)) {fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2);}}}
}void PrintLangsList(tesseract::TessBaseAPI* api) {GenericVector<STRING> languages;api->GetAvailableLanguagesAsVector(&languages);printf("List of available languages (%d):\n", languages.size());for (int index = 0; index < languages.size(); ++index) {STRING& string = languages[index];printf("%s\n", string.string());}api->End();
}void PrintBanner() {tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n",tesseract::TessBaseAPI::Version());
}void FixPageSegMode(tesseract::TessBaseAPI* api,tesseract::PageSegMode pagesegmode) {if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)api->SetPageSegMode(pagesegmode);
}// NOTE: arg_i is used here to avoid ugly *i so many times in this function
void ParseArgs(const int argc, char** argv,const char** lang,const char** image,const char** outputbase,const char** datapath,bool* list_langs,bool* print_parameters,GenericVector<STRING>* vars_vec,GenericVector<STRING>* vars_values,int* arg_i,tesseract::PageSegMode* pagesegmode) {if (argc == 1) {PrintHelpMessage(argv[0]);exit(0);}if (argc == 2) {if ((strcmp(argv[1], "-h") == 0) ||(strcmp(argv[1], "--help") == 0)) {PrintHelpMessage(argv[0]);exit(0);}if ((strcmp(argv[1], "--help-psm") == 0)) {PrintHelpForPSM();exit(0);}if ((strcmp(argv[1], "-v") == 0) ||(strcmp(argv[1], "--version") == 0)) {PrintVersionInfo();exit(0);}}bool noocr = false;int i = 1;while (i < argc && (*outputbase == NULL || argv[i][0] == '-')) {if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) {*lang = argv[i + 1];++i;}else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) {*datapath = argv[i + 1];++i;}else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) {vars_vec->push_back("user_words_file");vars_values->push_back(argv[i + 1]);++i;}else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) {vars_vec->push_back("user_patterns_file");vars_values->push_back(argv[i + 1]);++i;}else if (strcmp(argv[i], "--list-langs") == 0) {noocr = true;*list_langs = true;}else if (strcmp(argv[i], "-psm") == 0 && i + 1 < argc) {*pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));++i;}else if (strcmp(argv[i], "--print-parameters") == 0) {noocr = true;*print_parameters = true;}else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {// handled properly after api init++i;}else if (*image == NULL) {*image = argv[i];}else if (*outputbase == NULL) {*outputbase = argv[i];}++i;}*arg_i = i;if (argc == 2 && strcmp(argv[1], "--list-langs") == 0) {*list_langs = true;noocr = true;}if (*outputbase == NULL && noocr == false) {PrintHelpMessage(argv[0]);exit(1);}
}void PreloadRenderers(tesseract::TessBaseAPI* api,tesseract::PointerVector<tesseract::TessResultRenderer>* renderers,tesseract::PageSegMode pagesegmode,const char* outputbase) {if (pagesegmode == tesseract::PSM_OSD_ONLY) {renderers->push_back(new tesseract::TessOsdRenderer(outputbase));}else {bool b;api->GetBoolVariable("tessedit_create_hocr", &b);if (b) {bool font_info;api->GetBoolVariable("hocr_font_info", &font_info);renderers->push_back(new tesseract::TessHOcrRenderer(outputbase, font_info));}api->GetBoolVariable("tessedit_create_tsv", &b);if (b) {bool font_info;api->GetBoolVariable("hocr_font_info", &font_info);renderers->push_back(new tesseract::TessTsvRenderer(outputbase, font_info));}api->GetBoolVariable("tessedit_create_pdf", &b);if (b) {renderers->push_back(new tesseract::TessPDFRenderer(outputbase,api->GetDatapath()));}api->GetBoolVariable("tessedit_write_unlv", &b);if (b) {renderers->push_back(new tesseract::TessUnlvRenderer(outputbase));}api->GetBoolVariable("tessedit_create_boxfile", &b);if (b) {renderers->push_back(new tesseract::TessBoxTextRenderer(outputbase));}api->GetBoolVariable("tessedit_create_txt", &b);if (b || renderers->empty()) {renderers->push_back(new tesseract::TessTextRenderer(outputbase));}}if (!renderers->empty()) {// Since the PointerVector auto-deletes, null-out the renderers that are// added to the root, and leave the root in the vector.for (int r = 1; r < renderers->size(); ++r) {(*renderers)[0]->insert((*renderers)[r]);(*renderers)[r] = NULL;}}
}int main(int argc, char **argv)
{const char* lang = "eng";const char* image = NULL;const char* outputbase = NULL;const char* datapath = NULL;bool list_langs = false;bool print_parameters = false;GenericVector<STRING> vars_vec, vars_values;int arg_i = 1;tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;ParseArgs(argc, argv,&lang, &image, &outputbase, &datapath,&list_langs, &print_parameters,&vars_vec, &vars_values, &arg_i, &pagesegmode);bool banner = false;if (outputbase != NULL && strcmp(outputbase, "-") &&strcmp(outputbase, "stdout")) {banner = true;}PERF_COUNT_START("Tesseract:main")tesseract::TessBaseAPI api;api.SetOutputName(outputbase);int init_failed = api.Init(datapath, lang, tesseract::OEM_DEFAULT,&(argv[arg_i]), argc - arg_i, &vars_vec, &vars_values, false);if (init_failed) {fprintf(stderr, "Could not initialize tesseract.\n");exit(1);}SetVariablesFromCLArgs(&api, argc, argv);if (list_langs) {PrintLangsList(&api);exit(0);}if (print_parameters) {FILE* fout = stdout;fprintf(stdout, "Tesseract parameters:\n");api.PrintVariables(fout);api.End();exit(0);}FixPageSegMode(&api, pagesegmode);if (pagesegmode == tesseract::PSM_AUTO_ONLY) {int ret_val = 0;Pix* pixs = pixRead(image);if (!pixs) {fprintf(stderr, "Cannot open input file: %s\n", image);exit(2);}api.SetImage(pixs);tesseract::Orientation orientation;tesseract::WritingDirection direction;tesseract::TextlineOrder order;float deskew_angle;tesseract::PageIterator* it = api.AnalyseLayout();if (it) {it->Orientation(&orientation, &direction, &order, &deskew_angle);tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \"Deskew angle: %.4f\n",orientation, direction, order, deskew_angle);}else {ret_val = 1;}delete it;pixDestroy(&pixs);exit(ret_val);}// set in_training_mode to true when using one of these configs:// ambigs.train, box.train, box.train.stderr, linebox, reboxbool b = false;bool in_training_mode =(api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||(api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||(api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);tesseract::PointerVector<tesseract::TessResultRenderer> renderers;if (in_training_mode) {renderers.push_back(NULL);}else {PreloadRenderers(&api, &renderers, pagesegmode, outputbase);}if (!renderers.empty()) {if (banner) PrintBanner();bool succeed = api.ProcessPages(image, NULL, 0, renderers[0]);if (!succeed) {fprintf(stderr, "Error during processing.\n");exit(1);}}PERF_COUNT_ENDreturn 0;                      // Normal exit
}

GitHub: https://github.com/fengbingchun/Tesseract-OCR_Test

Tesseract-OCR 3.04在Windows7 vs2013上编译过程相关推荐

  1. Intel TBB简介及在Windows7 VS2013上源码的编译过程

    Intel TBB(Intel Threading Building Blocks)是Intel线程构建块开源库,它的License是Apache 2.0. Intel TBB是一种用于并行编程的基于 ...

  2. OpenBLAS简介及在Windows7 VS2013上源码的编译过程

    OpenBLAS(Open Basic Linear Algebra Subprograms)是开源的基本线性代数子程序库,是一个优化的高性能多核BLAS库,主要包括矩阵与矩阵.矩阵与向量.向量与向量 ...

  3. boost1.55.0在vs2013上编译序列化库失败的解决方法

    2019独角兽企业重金招聘Python工程师标准>>> 之前一直没有使用boost序列化的库,所以一直没法发现boost.155.0序列化库在vs2013下面编译通不过. 今天打算用 ...

  4. 基于chyh1990/caffe-compact在windows vs2013上编译caffe步骤

    1.      从https://github.com/chyh1990/caffe-compact下载caffe-compact代码: 2.      通过CMake(cmake-gui)生成vs2 ...

  5. Ubuntu 10.04 内核2.6.34编译过程记录

    1.安装编译环境 $sudo  apt-get install build-essential kernel-package   libncurses5-dev 2.下载内核源代码 访问 http:/ ...

  6. Android-x86-6.0定制之路 - 在Ubuntu 16.04.5上编译

    前言 由于下载的开源系统没有系统签名,所以不可能针对系统去做什么定制. 首先,要去下载 Android-x86-6.0 的系统源码,再去尝试编译系统,如果编译成功并且能够正常运行的话,才能研究去定制系 ...

  7. 在Windows7/10上通过VS2013编译FFmpeg 4.1.3源码操作步骤

    多年前在https://blog.csdn.net/fengbingchun/article/details/40951403 中对FFmpeg在windows下的编译过程做过说明,那时FFmpeg版 ...

  8. Ubuntu12.04上编译PlateGatewayQt

    Ubuntu12.04上编译PlateGatewayQt 2013-03-09 00:21:56|  分类: Computer Vision |  标签:plategatwayqt  opencv   ...

  9. Tesseract OCR简介(三)--安装及参数使用

    参考:AI-Tesseract-OCR简介_花熊的博客-CSDN博客_tesseract坐标 一.windows 1.1 Tesseract安装 适用于Tesseract 3.05和Tesseract ...

最新文章

  1. 巴巴腾机器人怎么开机_【巴巴腾智能机器人使用】_摘要频道_什么值得买
  2. Hystrix和ribbon的超时时长准确配置的理论依据
  3. 在.net3.5中使用ListView控件和DataPager控件笔记
  4. css伪类元素加在元素前,CSS伪类:before在元素之前 :after 在元素之后实例讲解
  5. 【渝粤教育】电大中专电商运营实操 (8)作业 题库
  6. 找不到图片素材,看这里
  7. ICPC North Central NA Contest 2017 B - Pokemon Go Go
  8. 1.Spring实现数据库的读写分离
  9. lunix mysql创建视图_Linux命令:MySQL系列之六--VIEW视图/mysql -e
  10. 计算机编程—必备基础知识点
  11. 找不到项目 该项不在计算机中,删除文件夹提示找不到该项目怎么删除?“找不到该项目”强删方法(图文)...
  12. 在树莓派上创建区块链节点
  13. 手机进程设置多少个最好_手机打开,开发者选项中的这4个设置,性能瞬间提升一倍,不卡顿...
  14. net::ERR_INTERNET_DISCONNECTED
  15. 作为程序员,到底是老板对你怎么了,让你竟然写下这么既奇葩无语又崩溃的代码注释,笑哭...……
  16. 安装MySQL——压缩包安装
  17. [Tool] 仿博客园插入代码的 WLW 插件
  18. android ogg转mp3,MP3提取转换器
  19. bootstrap使用及解析
  20. 无人驾驶技术的突破与挑战

热门文章

  1. 机器学习中的算法(4.3):SVM----针对线性不可分问题理解
  2. 基于语义分割的视频弹幕防挡实现(训练、测试、部署实现)
  3. 兰州大学C语言程序设计课程作业,【兰州大学|兰州大学C语言程序设计课程作业( 五 )】语言程序设计|课程|作业-傻大方...
  4. docker安装redis提示没有日记写入权限_对 Redis 在 Windows 下的利用方式思考
  5. 基于线段的激光雷达和单目联合曲面重建​
  6. CDN和Web Cache领域相关的经典书籍推荐
  7. linux进程间通信:popen函数通过管道与shell通信
  8. Alpha冲刺 - (5/10)
  9. 学术-数学:哥德巴赫猜想
  10. thinkphp5内置标签