一、代码和训练文件:https://download.csdn.net/download/GGY1102/16681984

  • 利用 OpenCV 的 EAST 文本检测器定位图像中的文本区域。
  • 提取每个文本 ROI,然后使用 OpenCV 和 Tesseract v4 进行文本识别。

二、实际测试代码

from imutils.object_detection import non_max_suppression
from PIL import Image
import numpy as np
import pytesseract
import time
import cv2from matplotlib import pyplot as plt
import oscap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FPS, 15)def decode_predictions(scores, geometry):"""EAST 文本检测器两个参数:scores:文本区域的概率。geometry:文本区域的边界框位置。"""# The minimum probability of a detected text regionmin_confidence = 0.5# grab the number of rows and columns from the scores volume, then# initialize our set of bounding box rectangles and corresponding# confidence scoresnumRows, numCols = scores.shape[2:4]rects = []confidences = []# loop over the number of rowsfor y in range(0, numRows):# extract the scores (probabilities), followed by the# geometrical data used to derive potential bounding box# coordinates that surround textscoresData = scores[0, 0, y]xData0 = geometry[0, 0, y]xData1 = geometry[0, 1, y]xData2 = geometry[0, 2, y]xData3 = geometry[0, 3, y]anglesData = geometry[0, 4, y]# loop over the number of columnsfor x in range(0, numCols):# if our score does not have sufficient probability,# ignore itif scoresData[x] < min_confidence:continue# compute the offset factor as our resulting feature# maps will be 4x smaller than the input image(offsetX, offsetY) = (x * 4.0, y * 4.0)# extract the rotation angle for the prediction and# then compute the sin and cosineangle = anglesData[x]cos = np.cos(angle)sin = np.sin(angle)# use the geometry volume to derive the width and height# of the bounding boxh = xData0[x] + xData2[x]w = xData1[x] + xData3[x]# compute both the starting and ending (x, y)-coordinates# for the text prediction bounding boxendX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))startX = int(endX - w)startY = int(endY - h)# add the bounding box coordinates and probability score# to our respective listsrects.append((startX, startY, endX, endY))confidences.append(scoresData[x])# return a tuple of the bounding boxes and associated confidencesreturn (rects, confidences)def text_recognition(image):east_model = "frozen_east_text_detection.pb"# img_path = "images/road-sign-2-768x347.jpg"# set the new width and height and then determine the ratio in change for# both the width and height, both of them are multiples of 32newW, newH = 320, 320#  The (optional) amount of padding to add to each ROI border# You can try 0.05 for 5% or 0.10 for 10% (and so on) if find OCR result is incorrectpadding = 0.0# in order to apply Tesseract v4 to OCR text we must supply# (1) a language, (2) an OEM flag of 4, indicating that the we# wish to use the LSTM neural net model for OCR, and finally# (3) an OEM value, in this case, 7 which implies that we are# treating the ROI as a single line of textconfig = ("-l eng --oem 1 --psm 7")  # chi_simorig = image.copy()origH, origW = image.shape[:2]# calculate ratios that will be used to scale bounding box coordinatesrW = origW / float(newW)rH = origH / float(newH)# resize the image and grab the new image dimensionsimage = cv2.resize(image, (newW, newH))(H, W) = image.shape[:2]# define the two output layer names for the EAST detector model the first is the output probabilities# and the second can be used to derive the bounding box coordinates of textlayerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]# load the pre-trained EAST text detectorprint("[INFO] loading EAST text detector...")net = cv2.dnn.readNet(east_model)# construct a blob from the image and then perform a forward pass of# the model to obtain the two output layer setsblob = cv2.dnn.blobFromImage(image, 1.0, (W, H),(123.68, 116.78, 103.94), swapRB=True, crop=False)start = time.time()net.setInput(blob)(scores, geometry) = net.forward(layerNames)end = time.time()# show timing information on text predictionprint("[INFO] text detection cost {:.6f} seconds".format(end - start))# decode the predictions, then apply non-maxima suppression to# suppress weak, overlapping bounding boxes(rects, confidences) = decode_predictions(scores, geometry)# NMS effectively takes the most likely text regions, eliminating other overlapping regionsboxes = non_max_suppression(np.array(rects), probs=confidences)# initialize the list of results to contain our OCR bounding boxes and textresults = []# the bounding boxes represent where the text regions are, then recognize the text.# loop over the bounding boxes and process the results, preparing the stage for actual text recognitionfor (startX, startY, endX, endY) in boxes:# scale the bounding boxes coordinates based on the respective ratiosstartX = int(startX * rW)startY = int(startY * rH)endX = int(endX * rW)endY = int(endY * rH)# in order to obtain a better OCR of the text we can potentially# add a bit of padding surrounding the bounding box -- here we# are computing the deltas in both the x and y directionsdX = int((endX - startX) * padding)dY = int((endY - startY) * padding)# apply padding to each side of the bounding box, respectivelystartX = max(0, startX - dX)startY = max(0, startY - dY)endX = min(origW, endX + (dX * 2))endY = min(origH, endY + (dY * 2))# extract the actual padded ROIroi = orig[startY:endY, startX:endX]# use Tesseract v4 to recognize a text ROI in an imagetext = pytesseract.image_to_string(roi, config=config)# add the bounding box coordinates and actual text string to the results listresults.append(((startX, startY, endX, endY), text))# sort the bounding boxes coordinates from top to bottom based on the y-coordinate of the bounding boxresults = sorted(results, key=lambda r: r[0][1])output = orig.copy()# loop over the resultsfor ((startX, startY, endX, endY), text) in results:# display the text OCR'd by Tesseractprint("OCR TEXT")print("========")print("{}\n".format(text))# strip out non-ASCII text so we can draw the text on the image using OpenCVtext = "".join([c if ord(c) < 128 else "" for c in text]).strip()# draw the text and a bounding box surrounding the text region of the input imagecv2.rectangle(output, (startX, startY), (endX, endY), (0, 0, 255), 2)cv2.putText(output, text, (startX, startY - 20),cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 3)# show the output imagecv2.imshow("Text Detection", output)while True:ret, image = cap.read()text_recognition(image)#  cv2.imshow('img', image)if cv2.waitKey(10) == ord("q"):break
#随时准备按q退出
cap.release()
cv2.destroyAllWindows()

参考:https://zhuanlan.zhihu.com/p/64857243

Python-OpenCV-- 台式机外接摄像头EAST文本检测+OCR识别相关推荐

  1. 机器视觉 OpenCV—python 基于LSTM网络的OCR文本检测与识别

    文章目录 一.背景与环境搭建 二.文本检测与识别 一.背景与环境搭建 OpenCV的文本识别流程: OpenCV EAST 文本检测器执行文本检测, 我们提取出每个文本 ROI 并将其输入 Tesse ...

  2. opencv OCR 端到端场景文本检测与识别(webcam_demo) vs2015

    版权声明:技术分享,csdn longji https://blog.csdn.net/longji/article/details/78274842 01 资源 OpenCV自带的端到端场景文本检测 ...

  3. Python+OpenCV 调用手机摄像头并实现人脸识别

    文章内容: 1.windows 环境下安装 OpenCV 机器视觉环境搭建: 2.基于通过 Python+OpenCV调用手机摄像头并实现人脸检测识别. 目录 1 实验环境 2 实验准备 2.1 下载 ...

  4. Python+OpenCV+dlib汽车驾驶员疲劳驾驶检测

    点击查看:Python+OpenCV+dlib汽车驾驶员疲劳驾驶检测 文件大小:80M 操作系统:Windows10旗舰版 开发工具:Python3.8.OpenCV4.5.dlib 开发语言:.py ...

  5. Python基于CRNN&CTPN的文本检测系统(源码&教程)

    1.背景 文本是人类最伟大和最具影响力的发明之一,是人类智慧的结晶,是人类文化.思想传承的一种基本的表达方式和不可或缺的载体.在21世纪,文本与日常生活密切相关.描述.理解万事万物,表达情感,与他人交 ...

  6. OpenVINO+OpenCV 文本检测与识别

    本文转载自OpenCV学堂. 1 模型介绍 文本检测模型 OpenVINO支持场景文字检测是基于MobileNet的PixelLink模型,该模型有两个输出,分别是分割输出与bounding Boxe ...

  7. Python+OpenCV:图像快速角点检测算法(FAST Algorithm for Corner Detection)

    Python+OpenCV:图像快速角点检测算法(FAST Algorithm for Corner Detection) 理论 Feature Detection using FAST Select ...

  8. Python+OpenCV:图像Harris角点检测(Harris Corner Detection)

    Python+OpenCV:图像Harris角点检测(Harris Corner Detection) 理论 corners are regions in the image with large v ...

  9. 基于EAST和Tesseract的文本检测与识别

    目录 导言 现实世界问题 说明 问题陈述 业务目标和约束条件 可用于文本检测和识别的数据集 数据集概述和说明 探索性数据分析(EDA) 深度学习时代之前的文本检测方法 EAST(高效精确的场景文本检测 ...

最新文章

  1. C++ 容器1 vector
  2. 骑士卡:基于Kafka搭建消息中心,上亿消息推送轻松完成
  3. django form和model的一些零碎知识
  4. 数字气泡 php,vue指令如何实现气泡提示(附代码)
  5. 【科普】五分钟快速了解代码复杂度
  6. slf4j的简单用法以及与log4j的区别
  7. centos8安装中文(zh_CN)语言包
  8. 二路单调自增子序列模型【acdream 1216】
  9. (13)ISE14.7bit文件生成mcs文件(FPGA不积跬步101)
  10. 从零开始写项目第八篇【将未完成的项目发布在Tomcat上】
  11. Hbase namespace操作入门
  12. oracle varchar默认长度_Mysql Online DDL之VARCHAR字段扩容探索
  13. mysql支持啥系统_快速的掌握MySQL支持的操作系统
  14. java 虚拟机常用启动参数
  15. Revit二次开发——依据两条平曲线创建一条三维曲线
  16. 【win10升级】我们无法更新系统保留的分区
  17. php pcre回溯攻击,PHP利用PCRE回溯次数限制绕过某些安全限制
  18. upc 卡德加的兔子 线段树 + 矩阵快速幂
  19. 高速收费员招聘考试题计算机题,高速公路面试题和考官题本及答案44套
  20. 无人机通信无线电开放频段

热门文章

  1. AtCoder AGC043D Merge Triplets (DP、组合计数)
  2. python可选参数位置_每个位置参数的可选参数
  3. 通过ITypedList实现数据绑定扁平化
  4. springmvc学习笔记(19)-RESTful支持
  5. 网页设计界面 电脑版设计
  6. Nginx-location配置指南
  7. [转]使用CSS3 Grid布局实现内容优先
  8. zend studio 函数不提醒 小黄图标 小黄标
  9. 二、mysql数据类型
  10. 【转】PBOC3.0和PBOC2.0标准规范异同分析