我的east和ctpn速度差不多,east正确率高4%

http://xiaofengshi.com/2019/01/23/深度学习-TextDetection/

https://codeload.github.com/GlassyWing/text-detection-ocr/zip/master

1、传统算法

import cv2
import numpy as np# 读取图片
imagePath = 'asset/0015.jpg'
img = cv2.imread(imagePath)
def get_box(img):# 转化成灰度图gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)# 利用Sobel边缘检测生成二值图sobel = cv2.Sobel(gray, cv2.CV_8U, 1, 0, ksize=3)# 二值化ret, binary = cv2.threshold(sobel, 0, 255, cv2.THRESH_OTSU + cv2.THRESH_BINARY)# 膨胀、腐蚀element1 = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 9))element2 = cv2.getStructuringElement(cv2.MORPH_RECT, (24, 6))# 膨胀一次,让轮廓突出dilation = cv2.dilate(binary, element2, iterations=1)# 腐蚀一次,去掉细节erosion = cv2.erode(dilation, element1, iterations=1)# 再次膨胀,让轮廓明显一些dilation2 = cv2.dilate(erosion, element2, iterations=2)#  查找轮廓和筛选文字区域region = []contours, hierarchy = cv2.findContours(dilation2, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)for i in range(len(contours)):cnt = contours[i]# 计算轮廓面积,并筛选掉面积小的area = cv2.contourArea(cnt)if (area < 1000):continue# 找到最小的矩形rect = cv2.minAreaRect(cnt)print("rect is: ")print(rect)# box是四个点的坐标box = cv2.boxPoints(rect)box = np.int0(box)# 计算高和宽height = abs(box[0][1] - box[2][1])width = abs(box[0][0] - box[2][0])# 根据文字特征,筛选那些太细的矩形,留下扁的if (height > width * 1.3):continueregion.append(box)print('box is:',box)return regionfrom math import *def calcuate_angle(lines_h,length):angle_all = []for x in range(0,min(10,len(lines_h))):#    for x in range(0, len(lines_h)):for x1,y1,x2,y2 in lines_h[x]:# print('(x2-x1)*(x2-x1)+(y2-y1)*(y2-y1):',(x2-x1)*(x2-x1)+(y2-y1)*(y2-y1))if(x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)<length:continue#                cv2.line(img_color,(x1,y1),(x2,y2),(0,255,0),10)if(y2==y1):angle_line = 90else:angle_line = atan((x2-x1)/(y2-y1))*180/piif angle_line>45 :angle_line = angle_line-90if angle_line<-45:angle_line = angle_line+90angle_all.append(angle_line)angle_all.sort()#    angle_all_sort = angle_all[:int(9*len(angle_all)/10)]#            print(angle_all_sort)angle = -angle_all[int(len(angle_all)/2)]#            angle = angle_all/len(lines_h)return angle'''旋转图像'''
def RotateDegree(img,degree):#degree左转
#    img = cv2.imread(img)height, width = img.shape[:2]heightNew = int(width * fabs(sin(radians(degree))) + height * fabs(cos(radians(degree))))widthNew = int(height * fabs(sin(radians(degree))) + width * fabs(cos(radians(degree))))matRotation = cv2.getRotationMatrix2D((width / 2, height / 2), degree, 1)matRotation[0, 2] += (widthNew - width) / 2matRotation[1, 2] += (heightNew - height) / 2imgRotation = cv2.warpAffine(img, matRotation, (widthNew, heightNew), borderValue=(255, 255, 255))return imgRotationif __name__ == '__main__':# 绘制轮廓# region = get_box(img)# print(region)image = cv2.imread('./asset/0015.jpg')lines_h = []text_recs = get_box(image)print(len(text_recs))print(text_recs[1][0][0])for i in range(min(10, len(text_recs))):print('---:',(text_recs[i][0][0], text_recs[i][0][1]), (text_recs[i][1][0], text_recs[i][1][1]))lines_h.append([[text_recs[i][0][0], text_recs[i][0][1],text_recs[i][1][0], text_recs[i][1][1]]])angle = calcuate_angle(lines_h, 100)print(angle)new_img = RotateDegree(image,angle)cv2.imwrite('./asset/0015_rotate.jpg',new_img)# for box in region:#     cv2.drawContours(img, [box], 0, (0, 255, 0), 2)# cv2.imwrite('./asset/output_1.jpg',img)# cv2.imshow('img', img)# cv2.waitKey(0)# cv2.destroyAllWindows()

2、CTPN

结合Web版需要初始化一次模型,并且不能和其他模型冲突,我改了下:

class CTPN:def __init__(self, lr=0.00001, image_channels=3, vgg_trainable=True, weight_path=None, num_gpu=1):self.image_channels = image_channelsself.image_shape = (None, None, image_channels)self.vgg_trainable = vgg_trainableself.num_gpu = num_gpuself.lr = lrself.model, self.parallel_model, self.predict_model = self.__build_model()if weight_path is not None:self.model.load_weights(weight_path)def __build_model(self):base_model = VGG16(weights=None, include_top=False, input_shape=self.image_shape)base_model.load_weights(vgg_weights_path)if self.vgg_trainable:base_model.trainable = Trueelse:base_model.trainable = Falseinput = base_model.inputsub_output = base_model.get_layer('block5_conv3').outputx = Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu',name='rpn_conv1')(sub_output)x1 = Lambda(_reshape, output_shape=(None, 512))(x)x2 = Bidirectional(GRU(128, return_sequences=True), name='blstm')(x1)x3 = Lambda(_reshape2, output_shape=(None, None, 256))([x2, x])x3 = Conv2D(512, (1, 1), padding='same', activation='relu', name='lstm_fc')(x3)cls = Conv2D(10 * 2, (1, 1), padding='same', activation='linear', name='rpn_class_origin')(x3)regr = Conv2D(10 * 2, (1, 1), padding='same', activation='linear', name='rpn_regress_origin')(x3)cls = Lambda(_reshape3, output_shape=(None, 2), name='rpn_class')(cls)cls_prod = Activation('softmax', name='rpn_cls_softmax')(cls)regr = Lambda(_reshape3, output_shape=(None, 2), name='rpn_regress')(regr)predict_model = Model(input, [cls, regr, cls_prod])train_model = Model(input, [cls, regr])parallel_model = train_modelif self.num_gpu > 1:parallel_model = multi_gpu_model(train_model, gpus=self.num_gpu)adam = Adam(self.lr)parallel_model.compile(optimizer=adam,loss={'rpn_regress': _rpn_loss_regr, 'rpn_class': _rpn_loss_cls},loss_weights={'rpn_regress': 1.0, 'rpn_class': 1.0})return train_model, parallel_model, predict_modeldef train(self, train_data_generator, epochs, **kwargs):self.parallel_model.fit_generator(train_data_generator, epochs=epochs, **kwargs)def predict(self, image, output_path=None, mode=1):if type(image) == str:img = cv2.imdecode(np.fromfile(image, dtype=np.uint8), cv2.IMREAD_COLOR)else:img = imageh, w, c = img.shape# image size length must be greater than or equals 16 x 16,# because of the image will be reduced by 16 times.if h < 16 or w < 16:transform_w = max(16, w)transform_h = max(16, h)transform_img = np.ones(shape=(transform_h, transform_w, 3), dtype='uint8') * 255transform_img[:h, :w, :] = imgh = transform_hw = transform_wimg = transform_img# zero-center by mean pixelm_img = img - utils.IMAGE_MEANm_img = np.expand_dims(m_img, axis=0)cls, regr, cls_prod = self.predict_model.predict_on_batch(m_img)anchor = utils.gen_anchor((int(h / 16), int(w / 16)), 16)bbox = utils.bbox_transfor_inv(anchor, regr)bbox = utils.clip_box(bbox, [h, w])# score > 0.7fg = np.where(cls_prod[0, :, 1] > utils.IOU_SELECT)[0]select_anchor = bbox[fg, :]select_score = cls_prod[0, fg, 1]select_anchor = select_anchor.astype('int32')# filter sizekeep_index = utils.filter_bbox(select_anchor, 16)# nsmselect_anchor = select_anchor[keep_index]select_score = select_score[keep_index]select_score = np.reshape(select_score, (select_score.shape[0], 1))nmsbox = np.hstack((select_anchor, select_score))keep = utils.nms(nmsbox, 1 - utils.IOU_SELECT)select_anchor = select_anchor[keep]select_score = select_score[keep]# text linetextConn = TextProposalConnectorOriented()text = textConn.get_text_lines(select_anchor, select_score, [h, w])text = text.astype('int32')if mode == 1:for i in text:draw_rect(i, img)plt.imshow(img)plt.show()if output_path is not None:cv2.imwrite(output_path, img)elif mode == 2:return text, imgdef config(self):return {"image_channels": self.image_channels,"vgg_trainable": self.vgg_trainable,"lr": self.lr}@staticmethoddef save_config(obj, config_path):with open(config_path, "w+") as outfile:json.dump(obj.config(), outfile)@staticmethoddef load_config(config_path):with open(config_path, "r") as infile:return dict(json.load(infile))

不过检测框好像不对

3、EAST

https://github.com/huoyijie/AdvancedEAST

初始化遇到的Bug:https://blog.csdn.net/Maisie_Nan/article/details/103121134

【OCR】文字检测:传统算法、CTPN、EAST相关推荐

  1. OCR文字检测主要算法

    转载:https://www.mayi888.com/archives/60604 文字检测是文字识别过程中的一个非常重要的环节,文字检测的主要目标是将图片中的文字区域位置检测出来,以便于进行后面的文 ...

  2. OCR文字检测框的合并

    OCR文字检测框的合并 项目的github地址:https://github.com/zcswdt/merge_text_boxs 在我们使用文字检测模型的对文本进行检测的时候,可能效果不能如愿以偿, ...

  3. (一)图像文字检测论文:CTPN方法

    论文传送门:Detecting Text in Natural Image with Connectionist Text Proposal Network 1 摘要 我们提出一个新颖的级联文本推荐网 ...

  4. 目标检测——传统算法

    目标检测是计算机视觉中的重要任务之一.本系列博客将总结目标检测的各类算法,包括传统方法.基于CNN的算法(One stage.Two stage).本文主要对传统方法进行概述,大部分思想源自网上的博客 ...

  5. 基础 | OCR文字检测与识别

    作者|Gidi Shperber   编译|AI公园 导读 OCR中的研究,工具和挑战,都在这儿了. 介绍 我喜欢OCR(光学字符识别).对我来说,它代表了数据科学,尤其是计算机视觉的真正挑战.这是一 ...

  6. 利用OCR文字识别+百度算法搜索,玩转冲顶大会、百万英雄、芝士超人等答题赢奖金游戏

    [先上一张效果图]: 一.原理: 其实原理很简单: 1.手机投屏到电脑: 2.截取投屏画面的题目部分,进行识别,得到题目和三个答案: 3.将答案按照一定的算法,进行搜索,得出推荐答案: 4.添加了一些 ...

  7. 利用OCR文字识别+百度算法搜索,玩转冲顶大会、百万英雄、芝士超人等答题赢奖金游戏...

    [先上一张效果图]: 一.原理: 其实原理很简单: 1.手机投屏到电脑: 2.截取投屏画面的题目部分,进行识别,得到题目和三个答案: 3.将答案按照一定的算法,进行搜索,得出推荐答案: 4.添加了一些 ...

  8. 基于深度学习的目标检测及场景文字检测研究

    基于深度学习的目标检测及场景文字检测研究 转载自:https://blog.csdn.net/u013250416/article/details/79591263 一.目标检测与场景文字检测定义 目 ...

  9. OCR文本检测模型—pixel_link

    文章目录 一.PixelLink理论讲解 1.PixelLink整体框架 2.PixelLink网络结构 3.PixelLink实现过程 4.PixelLink检测效果 5.总结 二.代码演示 一.P ...

最新文章

  1. 如何使用canvas绘图
  2. GraphPad Prism绘制时间轴图
  3. DexFile类介绍
  4. 国产编程语言又造假,丢不起这人!
  5. couchbase_具有Rx-Java的Couchbase Java SDK
  6. SQLAlchemy Transactions
  7. Bailian3721 和数【标记】
  8. C++基础:第七章 函数
  9. Active Diretory 全攻略(一)--目录服务
  10. 报名系统 服务器,全国人事考试服务平台系统报名步骤(带报名入口)
  11. Properties与ResourceBundle的基本使用以及区别
  12. 如何在Tungsten Fabric上整合裸金属服务器(附配置验证过程)
  13. 进制的转换 如六进制
  14. Android存入mysql数据库中文乱码解决方法的超级坑
  15. 谈谈自己对微信商城的看法
  16. 一个无名前端的10年前端路
  17. 动态规划解决01背包问题
  18. 如何对接股票交易接口?
  19. 信息论与编码2 期末复习-线性码
  20. 电影-非常人贩(3)

热门文章

  1. typora 公式对齐_三年级数学下册概念及公式,一篇就搞定,给孩子寒假看!
  2. PRML 1.1 多项式曲线拟合
  3. JavaC++题解与拓展——leetcode310.最小高度树【复习链式前向星】
  4. 曾经拥有的,不要忘记。不能得到的,更要珍惜。属于自己的,不要放弃。已经失去的,留作回忆。
  5. JavaEE系统架构师学习路线(基础篇)
  6. Needleman–Wunsch algorithm
  7. 【日常学习】XML文件约束,Tomcat服务器和HTTP协议
  8. [面试日记] 1,时隔五年再次开始面试
  9. 解决SQL_Server2000“以前的某个程序安装已在安装计算机上创建挂起的文件操作。安装程序前必须重新启动计算机”
  10. 行业的英语术语大全之机械工具