OpenCV+Tesseract自动识别文字区域并识别文字

文字区域识别
文字区域处理
完整代码

如果图片中有非文字的其他图形，直接用tesseract进行识别的话，会把非文字的图形当成文字进行识别（往往识别出来的是乱七八糟的字符）。因此首先需要把文字区域识别出来，再对文字区域进行处理，最后进行文字识别。

文字区域识别

Mat preprocess(Mat gray)
{//1.Sobel算子，x方向求梯度Mat sobel;Sobel(gray, sobel, CV_8U, 1, 0, 3);//2.二值化Mat binary;threshold(sobel, binary, 0, 255, THRESH_OTSU + THRESH_BINARY);//3.膨胀和腐蚀操作核设定Mat element1 = getStructuringElement(MORPH_RECT, Size(30, 9));//控制高度设置可以控制上下行的膨胀程度，例如3比4的区分能力更强,但也会造成漏检Mat element2 = getStructuringElement(MORPH_RECT, Size(24, 4));//4.膨胀一次，让轮廓突出Mat dilate1;dilate(binary, dilate1, element2);//5.腐蚀一次，去掉细节，表格线等。这里去掉的是竖直的线Mat erode1;erode(dilate1, erode1, element1);//6.再次膨胀，让轮廓明显一些Mat dilate2;dilate(erode1, dilate2, element2);//7.存储中间图片imwrite("binary.jpg", binary);imwrite("dilate1.jpg", dilate1);imwrite("erode1.jpg", erode1);imwrite("dilate2.jpg", dilate2);return dilate2;
}vector<RotatedRect> findTextRegion(Mat img)
{vector<RotatedRect> rects;//1.查找轮廓vector<vector<Point>> contours;vector<Vec4i> hierarchy;findContours(img, contours, hierarchy, RETR_CCOMP, CHAIN_APPROX_SIMPLE, Point(0, 0));//2.筛选那些面积小的for (int i = 0; i < contours.size(); i++){//计算当前轮廓的面积double area = contourArea(contours[i]);//面积小于1000的全部筛选掉if (area < 1000)continue;//轮廓近似，作用较小，approxPolyDP函数有待研究double epsilon = 0.001 * arcLength(contours[i], true);Mat approx;approxPolyDP(contours[i], approx, epsilon, true);//找到最小矩形，该矩形可能有方向RotatedRect rect = minAreaRect(contours[i]);//计算高和宽int m_width = rect.boundingRect().width;int m_height = rect.boundingRect().height;//筛选那些太细的矩形，留下扁的if (m_height > m_width * 1.2)continue;//符合条件的rect添加到rects集合中rects.push_back(rect);}return rects;
}void detect(Mat img)
{//1.转化成灰度图Mat gray;cvtColor(img, gray, CV_BGR2GRAY);//2.形态学变换的预处理，得到可以查找矩形的轮廓Mat dilation = preprocess(gray);//3.查找和筛选文字区域vector<RotatedRect> rects = findTextRegion(dilation);//4.用绿线画出这些找到的轮廓for each (RotatedRect rect in rects){Point2f P[4];rect.points(P);for (int j = 0; j <= 3; j++){line(img, P[j], P[(j + 1) % 4], Scalar(0, 255, 0), 2);}}//5.显示带轮廓的图像imshow("img", img);imwrite("imgDrawRect.jpg", img);waitKey(0);
}

文字区域处理

用上面的方法识别出来的文字区域是文字区域的最小外接矩形，有可能有些文字的边边角角有些像素就被排除在外了，因此还需要把文字区域扩大一点。

cv::Mat originalPicture = imread(picturePath, cv::IMREAD_GRAYSCALE);//读取一张图片Mat dilation = preprocess(originalPicture );vector<RotatedRect> rects = findTextRegion(dilation);int xmin = 0, xmax = 0, ymin = 0, ymax = 0;int count = 0;for each (RotatedRect rect in rects){count++;Point2f P[4];rect.points(P);xmin = P[1].x;ymin = P[1].y;xmax = P[1].x;ymax = P[1].y;for (int j = 0; j <= 3; j++){if (P[j].x < xmin){xmin = P[j].x;}if (P[j].y < ymin){ymin = P[j].y;}if (P[j].x > xmax){xmax = P[j].x;}if (P[j].y > ymax){ymax = P[j].y;}}Rect tempRect(xmin - 5, ymin - 5, xmax - xmin + 10, ymax - ymin + 10);//文字区域的最小外接矩形的4条边再外扩5个像素点。具体外扩多少，还取决于文字区域旁边有多少空白的地方可以扩，尽量往外扩。}

文字区域外扩完之后，有可能还不能直接用tesseract进行识别，有可能识别出来是乱码（有可能是因为此时截下来的图的边沿到实际的文字距离太近了），此时还得对刚才外扩之后的区域再进行放大。

     Mat temppicture = originalPicture (tempRect);double scale = 2;//文字区域截图放大，倍数为2。具体的放大位数还需要调试，这取决于未放大前的文字区域图片的大小和分辨率等。Size dsize = Size(temppicture.cols * scale, temppicture.rows * scale);Mat img2 = Mat(dsize, CV_32S);resize(temppicture, img2, dsize);tessChi_sim->SetImage((uchar*)img2.data, img2.cols, img2.rows, 1, img2.cols);//tessChi_sim->SetSourceResolution(1000);tessChi_sim->SetVariable("textord_really_old_xheight", "1");char* out = tessChi_sim->GetUTF8Text();std::string temp(out);std::string stdstr;if (out != NULL){const char* textout = temp.c_str();printf(textout);text = text + gcnew System::String(textout, 0, strlen(textout), System::Text::UTF8Encoding::UTF8);}

完整代码

#include <opencv2\core\core.hpp>
#include <opencv2\highgui\highgui.hpp>
#include <opencv2/highgui/highgui_c.h>
#include "opencv2/imgproc/imgproc.hpp"
#include <opencv2\opencv.hpp>
#include <opencv2/imgcodecs.hpp>
#include <tesseract/baseapi.h>
#include <vector>using namespace std;
using namespace cv;
using namespace tesseract;
Mat preprocess(Mat gray)
{//1.Sobel算子，x方向求梯度Mat sobel;Sobel(gray, sobel, CV_8U, 1, 0, 3);//2.二值化Mat binary;threshold(sobel, binary, 0, 255, THRESH_OTSU + THRESH_BINARY);//3.膨胀和腐蚀操作核设定Mat element1 = getStructuringElement(MORPH_RECT, Size(30, 9));//控制高度设置可以控制上下行的膨胀程度，例如3比4的区分能力更强,但也会造成漏检Mat element2 = getStructuringElement(MORPH_RECT, Size(24, 4));//4.膨胀一次，让轮廓突出Mat dilate1;dilate(binary, dilate1, element2);//5.腐蚀一次，去掉细节，表格线等。这里去掉的是竖直的线Mat erode1;erode(dilate1, erode1, element1);//6.再次膨胀，让轮廓明显一些Mat dilate2;dilate(erode1, dilate2, element2);//7.存储中间图片imwrite("binary.jpg", binary);imwrite("dilate1.jpg", dilate1);imwrite("erode1.jpg", erode1);imwrite("dilate2.jpg", dilate2);return dilate2;
}vector<RotatedRect> findTextRegion(Mat img)
{vector<RotatedRect> rects;//1.查找轮廓vector<vector<Point>> contours;vector<Vec4i> hierarchy;findContours(img, contours, hierarchy, RETR_CCOMP, CHAIN_APPROX_SIMPLE, Point(0, 0));//2.筛选那些面积小的for (int i = 0; i < contours.size(); i++){//计算当前轮廓的面积double area = contourArea(contours[i]);//面积小于1000的全部筛选掉if (area < 1000)continue;//轮廓近似，作用较小，approxPolyDP函数有待研究double epsilon = 0.001 * arcLength(contours[i], true);Mat approx;approxPolyDP(contours[i], approx, epsilon, true);//找到最小矩形，该矩形可能有方向RotatedRect rect = minAreaRect(contours[i]);//计算高和宽int m_width = rect.boundingRect().width;int m_height = rect.boundingRect().height;//筛选那些太细的矩形，留下扁的if (m_height > m_width * 1.2)continue;//符合条件的rect添加到rects集合中rects.push_back(rect);}return rects;
}void detect(Mat img)
{//1.转化成灰度图Mat gray;cvtColor(img, gray, CV_BGR2GRAY);//2.形态学变换的预处理，得到可以查找矩形的轮廓Mat dilation = preprocess(gray);//3.查找和筛选文字区域vector<RotatedRect> rects = findTextRegion(dilation);//4.用绿线画出这些找到的轮廓for each (RotatedRect rect in rects){Point2f P[4];rect.points(P);for (int j = 0; j <= 3; j++){line(img, P[j], P[(j + 1) % 4], Scalar(0, 255, 0), 2);}}//5.显示带轮廓的图像imshow("img", img);imwrite("imgDrawRect.jpg", img);waitKey(0);
}

void main()
{cv::Mat originalPicture = imread(picturePath, cv::IMREAD_GRAYSCALE);//读取一张图片Mat dilation = preprocess(originalPicture );vector<RotatedRect> rects = findTextRegion(dilation);int xmin = 0, xmax = 0, ymin = 0, ymax = 0;int count = 0;for each (RotatedRect rect in rects){count++;Point2f P[4];rect.points(P);xmin = P[1].x;ymin = P[1].y;xmax = P[1].x;ymax = P[1].y;for (int j = 0; j <= 3; j++){if (P[j].x < xmin){xmin = P[j].x;}if (P[j].y < ymin){ymin = P[j].y;}if (P[j].x > xmax){xmax = P[j].x;}if (P[j].y > ymax){ymax = P[j].y;}}Rect tempRect(xmin - 5, ymin - 5, xmax - xmin + 10, ymax - ymin + 10);//文字区域的最小外接矩形的4条边再外扩5个像素点。具体外扩多少，还取决于文字区域旁边有多少空白的地方可以扩，尽量往外扩。}Mat temppicture = originalPicture(tempRect);double scale = 2;//文字区域截图放大，倍数为2。具体的放大位数还需要调试，这取决于未放大前的文字区域图片的大小和分辨率等。Size dsize = Size(temppicture.cols * scale, temppicture.rows * scale);Mat img2 = Mat(dsize, CV_32S);resize(temppicture, img2, dsize);tessChi_sim->SetImage((uchar*)img2.data, img2.cols, img2.rows, 1, img2.cols);//tessChi_sim->SetSourceResolution(1000);tessChi_sim->SetVariable("textord_really_old_xheight", "1");char* out = tessChi_sim->GetUTF8Text();std::string temp(out);std::string stdstr;if (out != NULL){const char* textout = temp.c_str();printf(textout);text = text + gcnew System::String(textout, 0, strlen(textout), System::Text::UTF8Encoding::UTF8);}
}

OpenCV+Tesseract自动识别文字区域并识别文字相关推荐

我们怎样识别图片上的文字？图片识别文字软件有哪些？
在日常生活中,大家都会保留很多照片.比如老师上课的PPT.上班办公的文档.随手截取的网页图片等等.这些有时候是因为我们来不及记录,而拍照作备用的,后期还需要我们去手写抄录.其实这效率未免太慢了,如果可 ...
手机如何提取图片中的文字、拍照识别文字的操作
我们在看书的过程中,经常遇到一些优美的文字,但是苦于文字篇幅太长,打字输入到电脑又太累,仅仅拍照又不方便将来的查找和引用.有没有一种办法,用手机拍下图书的照片,然后直接转换成文字的呢?当然有,本文就是 ...
微信怎么识别文字？手机识别文字原理是什么？
微信是一款十分智能化的应用程序,其文字识别功能也是其中之一.微信的文字识别功能主要依赖于OCR技术,即光学字符识别技术. OCR的原理是什么? OCR技术是一种将图像中的字符.数字等信息转换成文本的技 ...
【项目实践】中英文文字检测与识别项目（CTPN+CRNN+CTC Loss原理讲解）
点击上方"小白学视觉",选择加"星标"或"置顶" 重磅干货,第一时间送达本文转自:opencv学堂 OCR--简介文字识别也是图像领域一 ...
利用语义分割（FCN）区分两种有文字和无文字区域
问题描述: 图书馆中的书大小不一,为了保证美观,章的位置应该尽量贴到图书的第一页的空白区域语义分割即是对图像中每一个像素点进行分类,确定每个点的类别(空白或者是文字区域),从而进行区域划分.图像分割 ...
OpenCV 文字检测与识别模块
OpenCV 文字检测与识别模块该模块在扩展模块中,需自行下载下载地址:https://github.com/opencv/opencv_contrib/tree/4.0.0 说明文档: 文字检测 ...
uiautomator2+ tesseract 智能识别文字实现手游辅助外挂,打怪刷装备快人一步
目录一.背景二.需求分解三.脚本开发实践 1.tesseract 安装及测试 2.python使用Tesseract库识别文字 3.构建定时任务,定时刷怪 4.最终效果一.背景先交代下背景, ...
opencv学习笔记五--文件扫描+OCR文字识别
opencv学习笔记五--文件扫描+OCR文字识别文件扫描定义函数边缘检测获取轮廓变换 OCR文字识别环境配置代码文件扫描 # 导入工具包 import numpy as np imp ...
opencv文字区域的提取(vs2019 c++)
原理我们是怎么做到检测到区域的呢? 首先,我们会注意到,文字区域和其他的图片背景很不一样.我们用膨胀处理图片,让文字变成一块块大区域,然后识别整块的轮廓,用矩形去框住这个轮廓. 这个程序分三个子函数 ...
R语言图片识别文字 PNG JPG图片转文字 OCR tesseract包
提示:适用于比较简单的结构(例如excel的截图),图片文字保证清晰 R-studio版本其实Python有很多接口百度OCR,B站有很多应用性很强的教学可以学习下 #安装-可以直接Packages ...

OpenCV+Tesseract自动识别文字区域并识别文字

OpenCV+Tesseract自动识别文字区域并识别文字

文字区域识别

文字区域处理

完整代码

OpenCV+Tesseract自动识别文字区域并识别文字相关推荐

最新文章

热门文章