Python-OpenCV-- 台式机外接摄像头EAST文本检测+OCR识别

一、代码和训练文件：https://download.csdn.net/download/GGY1102/16681984

利用 OpenCV 的 EAST 文本检测器定位图像中的文本区域。
提取每个文本 ROI，然后使用 OpenCV 和 Tesseract v4 进行文本识别。

二、实际测试代码

from imutils.object_detection import non_max_suppression
from PIL import Image
import numpy as np
import pytesseract
import time
import cv2from matplotlib import pyplot as plt
import oscap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FPS, 15)def decode_predictions(scores, geometry):"""EAST 文本检测器两个参数：scores：文本区域的概率。geometry：文本区域的边界框位置。"""# The minimum probability of a detected text regionmin_confidence = 0.5# grab the number of rows and columns from the scores volume, then# initialize our set of bounding box rectangles and corresponding# confidence scoresnumRows, numCols = scores.shape[2:4]rects = []confidences = []# loop over the number of rowsfor y in range(0, numRows):# extract the scores (probabilities), followed by the# geometrical data used to derive potential bounding box# coordinates that surround textscoresData = scores[0, 0, y]xData0 = geometry[0, 0, y]xData1 = geometry[0, 1, y]xData2 = geometry[0, 2, y]xData3 = geometry[0, 3, y]anglesData = geometry[0, 4, y]# loop over the number of columnsfor x in range(0, numCols):# if our score does not have sufficient probability,# ignore itif scoresData[x] < min_confidence:continue# compute the offset factor as our resulting feature# maps will be 4x smaller than the input image(offsetX, offsetY) = (x * 4.0, y * 4.0)# extract the rotation angle for the prediction and# then compute the sin and cosineangle = anglesData[x]cos = np.cos(angle)sin = np.sin(angle)# use the geometry volume to derive the width and height# of the bounding boxh = xData0[x] + xData2[x]w = xData1[x] + xData3[x]# compute both the starting and ending (x, y)-coordinates# for the text prediction bounding boxendX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))startX = int(endX - w)startY = int(endY - h)# add the bounding box coordinates and probability score# to our respective listsrects.append((startX, startY, endX, endY))confidences.append(scoresData[x])# return a tuple of the bounding boxes and associated confidencesreturn (rects, confidences)def text_recognition(image):east_model = "frozen_east_text_detection.pb"# img_path = "images/road-sign-2-768x347.jpg"# set the new width and height and then determine the ratio in change for# both the width and height, both of them are multiples of 32newW, newH = 320, 320#  The (optional) amount of padding to add to each ROI border# You can try 0.05 for 5% or 0.10 for 10% (and so on) if find OCR result is incorrectpadding = 0.0# in order to apply Tesseract v4 to OCR text we must supply# (1) a language, (2) an OEM flag of 4, indicating that the we# wish to use the LSTM neural net model for OCR, and finally# (3) an OEM value, in this case, 7 which implies that we are# treating the ROI as a single line of textconfig = ("-l eng --oem 1 --psm 7")  # chi_simorig = image.copy()origH, origW = image.shape[:2]# calculate ratios that will be used to scale bounding box coordinatesrW = origW / float(newW)rH = origH / float(newH)# resize the image and grab the new image dimensionsimage = cv2.resize(image, (newW, newH))(H, W) = image.shape[:2]# define the two output layer names for the EAST detector model the first is the output probabilities# and the second can be used to derive the bounding box coordinates of textlayerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]# load the pre-trained EAST text detectorprint("[INFO] loading EAST text detector...")net = cv2.dnn.readNet(east_model)# construct a blob from the image and then perform a forward pass of# the model to obtain the two output layer setsblob = cv2.dnn.blobFromImage(image, 1.0, (W, H),(123.68, 116.78, 103.94), swapRB=True, crop=False)start = time.time()net.setInput(blob)(scores, geometry) = net.forward(layerNames)end = time.time()# show timing information on text predictionprint("[INFO] text detection cost {:.6f} seconds".format(end - start))# decode the predictions, then apply non-maxima suppression to# suppress weak, overlapping bounding boxes(rects, confidences) = decode_predictions(scores, geometry)# NMS effectively takes the most likely text regions, eliminating other overlapping regionsboxes = non_max_suppression(np.array(rects), probs=confidences)# initialize the list of results to contain our OCR bounding boxes and textresults = []# the bounding boxes represent where the text regions are, then recognize the text.# loop over the bounding boxes and process the results, preparing the stage for actual text recognitionfor (startX, startY, endX, endY) in boxes:# scale the bounding boxes coordinates based on the respective ratiosstartX = int(startX * rW)startY = int(startY * rH)endX = int(endX * rW)endY = int(endY * rH)# in order to obtain a better OCR of the text we can potentially# add a bit of padding surrounding the bounding box -- here we# are computing the deltas in both the x and y directionsdX = int((endX - startX) * padding)dY = int((endY - startY) * padding)# apply padding to each side of the bounding box, respectivelystartX = max(0, startX - dX)startY = max(0, startY - dY)endX = min(origW, endX + (dX * 2))endY = min(origH, endY + (dY * 2))# extract the actual padded ROIroi = orig[startY:endY, startX:endX]# use Tesseract v4 to recognize a text ROI in an imagetext = pytesseract.image_to_string(roi, config=config)# add the bounding box coordinates and actual text string to the results listresults.append(((startX, startY, endX, endY), text))# sort the bounding boxes coordinates from top to bottom based on the y-coordinate of the bounding boxresults = sorted(results, key=lambda r: r[0][1])output = orig.copy()# loop over the resultsfor ((startX, startY, endX, endY), text) in results:# display the text OCR'd by Tesseractprint("OCR TEXT")print("========")print("{}\n".format(text))# strip out non-ASCII text so we can draw the text on the image using OpenCVtext = "".join([c if ord(c) < 128 else "" for c in text]).strip()# draw the text and a bounding box surrounding the text region of the input imagecv2.rectangle(output, (startX, startY), (endX, endY), (0, 0, 255), 2)cv2.putText(output, text, (startX, startY - 20),cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 3)# show the output imagecv2.imshow("Text Detection", output)while True:ret, image = cap.read()text_recognition(image)#  cv2.imshow('img', image)if cv2.waitKey(10) == ord("q"):break
#随时准备按q退出
cap.release()
cv2.destroyAllWindows()

参考：https://zhuanlan.zhihu.com/p/64857243

Python-OpenCV-- 台式机外接摄像头EAST文本检测+OCR识别相关推荐

机器视觉 OpenCV—python 基于LSTM网络的OCR文本检测与识别
文章目录一.背景与环境搭建二.文本检测与识别一.背景与环境搭建 OpenCV的文本识别流程: OpenCV EAST 文本检测器执行文本检测, 我们提取出每个文本 ROI 并将其输入 Tesse ...
opencv OCR 端到端场景文本检测与识别(webcam_demo) vs2015
版权声明:技术分享,csdn longji https://blog.csdn.net/longji/article/details/78274842 01 资源 OpenCV自带的端到端场景文本检测 ...
Python+OpenCV 调用手机摄像头并实现人脸识别
文章内容: 1.windows 环境下安装 OpenCV 机器视觉环境搭建: 2.基于通过 Python+OpenCV调用手机摄像头并实现人脸检测识别. 目录 1 实验环境 2 实验准备 2.1 下载 ...
Python+OpenCV+dlib汽车驾驶员疲劳驾驶检测
点击查看:Python+OpenCV+dlib汽车驾驶员疲劳驾驶检测文件大小:80M 操作系统:Windows10旗舰版开发工具:Python3.8.OpenCV4.5.dlib 开发语言:.py ...
Python基于CRNN＆CTPN的文本检测系统（源码＆教程）
1.背景文本是人类最伟大和最具影响力的发明之一,是人类智慧的结晶,是人类文化.思想传承的一种基本的表达方式和不可或缺的载体.在21世纪,文本与日常生活密切相关.描述.理解万事万物,表达情感,与他人交 ...
OpenVINO+OpenCV 文本检测与识别
本文转载自OpenCV学堂. 1 模型介绍文本检测模型 OpenVINO支持场景文字检测是基于MobileNet的PixelLink模型,该模型有两个输出,分别是分割输出与bounding Boxe ...
Python+OpenCV：图像快速角点检测算法(FAST Algorithm for Corner Detection)
Python+OpenCV:图像快速角点检测算法(FAST Algorithm for Corner Detection) 理论 Feature Detection using FAST Select ...
Python+OpenCV：图像Harris角点检测(Harris Corner Detection)
Python+OpenCV:图像Harris角点检测(Harris Corner Detection) 理论 corners are regions in the image with large v ...
基于EAST和Tesseract的文本检测与识别
目录导言现实世界问题说明问题陈述业务目标和约束条件可用于文本检测和识别的数据集数据集概述和说明探索性数据分析(EDA) 深度学习时代之前的文本检测方法 EAST(高效精确的场景文本检测 ...

Python-OpenCV-- 台式机外接摄像头EAST文本检测+OCR识别

一、代码和训练文件：https://download.csdn.net/download/GGY1102/16681984

二、实际测试代码

Python-OpenCV-- 台式机外接摄像头EAST文本检测+OCR识别相关推荐

最新文章

热门文章